ml_credit_dataset <- read.csv("ml_credit_dataset.csv")
str(ml_credit_dataset)
## 'data.frame': 1000 obs. of 87 variables:
## $ CheckingAccountStatus.0.to.200 : int 0 1 0 0 0 0 0 1 0 1 ...
## $ CheckingAccountStatus.gt.200 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CheckingAccountStatus.lt.0 : int 1 0 0 1 1 0 0 0 0 0 ...
## $ CheckingAccountStatus.none : int 0 0 1 0 0 1 1 0 1 0 ...
## $ Duration.0.to.6 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ Duration.6.to.12 : int 0 0 1 0 0 0 0 0 1 0 ...
## $ Duration.12.to.18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Duration.18.to.24 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ Duration.24.to.30 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Duration.30.to.36 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Duration.36.to.42 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ Duration.42.to.48 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ Duration.48.to.54 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Duration.54.to.60 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Duration.66.to.72 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CreditHistory.Critical : int 1 0 1 0 0 0 0 0 0 1 ...
## $ CreditHistory.Delay : int 0 0 0 0 1 0 0 0 0 0 ...
## $ CreditHistory.NoCredit.AllPaid : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CreditHistory.PaidDuly : int 0 1 0 1 0 1 1 1 1 0 ...
## $ CreditHistory.ThisBank.AllPaid : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Business : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.DomesticAppliance : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Education : int 0 0 1 0 0 1 0 0 0 0 ...
## $ Purpose.Furniture.Equipment : int 0 0 0 1 0 0 1 0 0 0 ...
## $ Purpose.NewCar : int 0 0 0 0 1 0 0 0 0 1 ...
## $ Purpose.Others : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Radio.Television : int 1 1 0 0 0 0 0 0 1 0 ...
## $ Purpose.Repairs : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Retraining : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.UsedCar : int 0 0 0 0 0 0 0 1 0 0 ...
## $ SavingsAccountBonds.100.to.500 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SavingsAccountBonds.500.to.1000 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ SavingsAccountBonds.gt.1000 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ SavingsAccountBonds.lt.100 : int 0 1 1 1 1 0 0 1 0 1 ...
## $ SavingsAccountBonds.Unknown : int 1 0 0 0 0 1 0 0 0 0 ...
## $ EmploymentDuration.0.to.1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ EmploymentDuration.1.to.4 : int 0 1 0 0 1 1 0 1 0 0 ...
## $ EmploymentDuration.4.to.7 : int 0 0 1 1 0 0 0 0 1 0 ...
## $ EmploymentDuration.gt.7 : int 1 0 0 0 0 0 1 0 0 0 ...
## $ EmploymentDuration.Unemployed : int 0 0 0 0 0 0 0 0 0 1 ...
## $ InstallmentRatePercentage.1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ InstallmentRatePercentage.2 : int 0 1 1 1 0 1 0 1 1 0 ...
## $ InstallmentRatePercentage.3 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ InstallmentRatePercentage.4 : int 1 0 0 0 0 0 0 0 0 1 ...
## $ Personal.Female.NotSingle : int 0 1 0 0 0 0 0 0 0 0 ...
## $ Personal.Male.Divorced.Seperated : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Personal.Male.Married.Widowed : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Personal.Male.Single : int 1 0 1 1 1 1 1 1 0 0 ...
## $ OtherDebtorsGuarantors.CoApplicant : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherDebtorsGuarantors.Guarantor : int 0 0 0 1 0 0 0 0 0 0 ...
## $ OtherDebtorsGuarantors.None : int 1 1 1 0 1 1 1 1 1 1 ...
## $ ResidenceDuration.1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ResidenceDuration.2 : int 0 1 0 0 0 0 0 1 0 1 ...
## $ ResidenceDuration.3 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ ResidenceDuration.4 : int 1 0 0 1 1 1 1 0 1 0 ...
## $ Property.CarOther : int 0 0 0 0 0 0 0 1 0 1 ...
## $ Property.Insurance : int 0 0 0 1 0 0 1 0 0 0 ...
## $ Property.RealEstate : int 1 1 1 0 0 0 0 0 1 0 ...
## $ Property.Unknown : int 0 0 0 0 1 1 0 0 0 0 ...
## $ Age.18.to.24 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ Age.24.to.30 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Age.30.to.36 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Age.36.to.42 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Age.42.to.48 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ Age.48.to.54 : int 0 0 1 0 1 0 1 0 0 0 ...
## $ Age.54.to.60 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Age.60.to.66 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Age.66.to.72 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ Age.72.to.78 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherInstallmentPlans.Bank : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherInstallmentPlans.None : int 1 1 1 1 1 1 1 1 1 1 ...
## $ OtherInstallmentPlans.Stores : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Housing.ForFree : int 0 0 0 1 1 1 0 0 0 0 ...
## $ Housing.Own : int 1 1 1 0 0 0 1 0 1 1 ...
## $ Housing.Rent : int 0 0 0 0 0 0 0 1 0 0 ...
## $ NumberExistingCredits.1 : int 0 1 1 1 0 1 1 1 1 0 ...
## $ NumberExistingCredits.2 : int 1 0 0 0 1 0 0 0 0 1 ...
## $ NumberExistingCredits.3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NumberExistingCredits.4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Job.Management.SelfEmp.HighlyQualified: int 0 0 0 0 0 0 0 1 0 1 ...
## $ Job.SkilledEmployee : int 1 1 0 1 1 0 1 0 0 0 ...
## $ Job.UnemployedUnskilled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Job.UnskilledResident : int 0 0 1 0 0 1 0 0 1 0 ...
## $ NumberPeopleMaintenance : int 1 1 2 2 2 2 1 1 1 1 ...
## $ Telephone : int 1 0 0 0 0 1 0 1 0 0 ...
## $ ForeignWorker : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Class : Factor w/ 2 levels "Bad","Good": 2 1 2 2 1 2 2 2 2 1 ...
library(mlr)
## Loading required package: ParamHelpers
credit.task = makeClassifTask(data = ml_credit_dataset, target = "Class")
credit.task = removeConstantFeatures(credit.task)
credit.task
## Supervised task: ml_credit_dataset
## Type: classif
## Target: Class
## Observations: 1000
## Features:
## numerics factors ordered functionals
## 86 0 0 0
## Missings: FALSE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE
## Classes: 2
## Bad Good
## 300 700
## Positive class: Bad
-In regular classification the aim is to minimize the misclassification rate and thus all types of misclassification errors are deemed equally severe.
-A more general setting is cost-sensitive classification where the costs caused by different kinds of errors are not assumed to be equal and the objective is to minimize the expected costs.
-In case of class-dependent costs the costs depend on the true and predicted class label. The costs c(k,l) for predicting class k if the true label is l are usually organized into a K×K cost matrix where K is the number of classes.
-Naturally, it is assumed that the cost of predicting the correct class label y is minimal (that is c(y,y)≤c(k,y) for all k=1,…,K).
-There are some classification methods that can accomodate misclassification costs directly. One example is rpart.
-Alternatively, we can use cost-insensitive methods and manipulate the predictions or the training data in order to take misclassification costs into account. mlr supports \(\textbf{ thresholding }\) and \(\textbf{ rebalancing }\).
-\(\textbf{Thresholding: }\) The thresholds used to turn posterior probabilities into class labels, are chosen such that the costs are minimized. This requires a Learner that can predict posterior probabilities. During training the costs are not taken into account.
-\(\textbf{Rebalancing: }\) The idea is to change the proportion of the classes in the training data set in order to account for costs during training, either by \(\textem{weighting}\) or by \(\textem{sampling}\). Rebalancing does not require that the Learner can predict probabilities.
—– For weighting we need a Learner that supports class weights or observation weights.
—– If the Learner cannot deal with weights the proportion of classes can be changed by over- and undersampling.
Cost Matrix for German Credit Data
costs = matrix(c(0, 1, 5, 0), 2)
colnames(costs) = rownames(costs) = getTaskClassLevels(credit.task)
costs
## Bad Good
## Bad 0 5
## Good 1 0
So, the maximum cost is 5 and minimum 0. We penalize if the true class was “Bad” but the model predicts “Good”.
We start by fitting a logistic regression model to the German credit data set and predict posterior probabilities.
logisticLrn = makeLearner("classif.multinom", predict.type = "prob")
logisticModel = mlr::train(logisticLrn, credit.task)
## # weights: 88 (87 variable)
## initial value 693.147181
## iter 10 value 472.774156
## iter 20 value 445.997827
## iter 30 value 444.374321
## iter 40 value 444.223040
## iter 50 value 444.158378
## iter 60 value 444.117755
## iter 70 value 444.107639
## iter 80 value 444.106620
## final value 444.106579
## converged
logisticpred = predict(logisticModel, task = credit.task)
logisticpred
## Prediction: 1000 observations
## predict.type: prob
## threshold: Bad=0.50,Good=0.50
## time: 0.01
## id truth prob.Bad prob.Good response
## 1 1 Good 0.02001323 0.9799868 Good
## 2 2 Bad 0.74111232 0.2588877 Bad
## 3 3 Good 0.03363280 0.9663672 Good
## 4 4 Good 0.10402736 0.8959726 Good
## 5 5 Bad 0.67594919 0.3240508 Bad
## 6 6 Good 0.18333223 0.8166678 Good
## ... (#rows: 1000, #cols: 5)
We also fit the data with C50 alogorithm.
c50Lrn = makeLearner("classif.C50", predict.type = "prob")
c50Model = mlr::train(c50Lrn, credit.task)
c50pred = predict(c50Model, task = credit.task)
c50pred
## Prediction: 1000 observations
## predict.type: prob
## threshold: Bad=0.50,Good=0.50
## time: 0.21
## id truth prob.Bad prob.Good response
## 1 1 Good 0.06571429 0.9342857 Good
## 2 2 Bad 0.88750000 0.1125000 Bad
## 3 3 Good 0.08534799 0.9146520 Good
## 4 4 Good 0.04193549 0.9580645 Good
## 5 5 Bad 0.17916667 0.8208333 Good
## 6 6 Good 0.01666667 0.9833333 Good
## ... (#rows: 1000, #cols: 5)
The default thresholds for both classes are 0.5. But according to the cost matrix we should predict class Good only if we are very sure that Good is indeed the correct label. Therefore we should increase the threshold for class Good and decrease the threshold for class Bad.
The theoretical threshold for the positive class in two class case can be calculated from the cost matrix as : \(t^* = \frac{c(+1,-1)-c(-1,-1)}{c(+1,-1)-c(+1,+1)+c(-1,+1)-c(-1,-1)}\) This formula comes from the fact that cost of predicting class 1(given the actual is class 1) must be less than cost of predicting -1. \(P(j=-1|x)c(+1,-1) + P(j=+1|x)c(+1,+1) \leq P(j=-1|x)c(-1,-1) + P(j=+1|x)c(-1,+1)\) if we take \(p = P(j=+1|x)\) then a threshold value can be derived from, \((1-t^*)c(+1,-1)+t^*c(-1,-1) = (1-t^*)c(-1,-1)+t^*c(-1,+1)\)
Calculate the theoretical threshold for the positive class: Since c(+1,+1)=c(-1,-1)=0
th = costs[2,1]/(costs[2,1] + costs[1,2])
th
## [1] 0.1666667
-you can change thresholds in mlr either before training by using the “predict.threshold”" option of makeLearner or after prediction by calling setThreshold on the Prediction object.
-Predict class labels according to the theoretical threshold
logisticpred.th = setThreshold(logisticpred, th)
logisticpred.th
## Prediction: 1000 observations
## predict.type: prob
## threshold: Bad=0.17,Good=0.83
## time: 0.01
## id truth prob.Bad prob.Good response
## 1 1 Good 0.02001323 0.9799868 Good
## 2 2 Bad 0.74111232 0.2588877 Bad
## 3 3 Good 0.03363280 0.9663672 Good
## 4 4 Good 0.10402736 0.8959726 Good
## 5 5 Bad 0.67594919 0.3240508 Bad
## 6 6 Good 0.18333223 0.8166678 Bad
## ... (#rows: 1000, #cols: 5)
c50pred.th = setThreshold(c50pred, th)
c50pred.th
## Prediction: 1000 observations
## predict.type: prob
## threshold: Bad=0.17,Good=0.83
## time: 0.21
## id truth prob.Bad prob.Good response
## 1 1 Good 0.06571429 0.9342857 Good
## 2 2 Bad 0.88750000 0.1125000 Bad
## 3 3 Good 0.08534799 0.9146520 Good
## 4 4 Good 0.04193549 0.9580645 Good
## 5 5 Bad 0.17916667 0.8208333 Bad
## 6 6 Good 0.01666667 0.9833333 Good
## ... (#rows: 1000, #cols: 5)
In order to calculate the average costs over the entire data set we first need to create a new performance Measure. This can be done through function makeCostMeasure. It is expected that the rows of the cost matrix indicate true and the columns predicted class labels.
credit.costs = makeCostMeasure(id = "credit.costs", name = "Credit costs", costs = costs,
best = 0, worst = 5)
credit.costs
## Name: Credit costs
## Performance measure: credit.costs
## Properties: classif,classif.multi,req.pred,req.truth,predtype.response,predtype.prob
## Minimize: TRUE
## Best: 0; Worst: 5
## Aggregated by: test.mean
## Arguments: costs=<matrix>, combine=<function>
## Note:
Then the average costs can be computed by function performance. Below we compare the average costs and the error rate (mmce) of the learning algorithm with both default thresholds 0.5 and theoretical thresholds.
Performance with default thresholds 0.5
performance(logisticpred, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.735 0.207
calculateConfusionMatrix(logisticpred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
## predicted
## true Bad Good -err.-
## Bad 0.56/0.69 0.44/0.17 0.44
## Good 0.11/0.31 0.89/0.83 0.11
## -err.- 0.31 0.17 0.21
##
##
## Absolute confusion matrix:
## predicted
## true Bad Good -err.-
## Bad 168 132 132
## Good 75 625 75
## -err.- 75 132 207
performance(c50pred, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.286 0.078
calculateConfusionMatrix(c50pred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
## predicted
## true Bad Good -err.-
## Bad 0.83/0.91 0.17/0.07 0.17
## Good 0.04/0.09 0.96/0.93 0.04
## -err.- 0.09 0.07 0.08
##
##
## Absolute confusion matrix:
## predicted
## true Bad Good -err.-
## Bad 248 52 52
## Good 26 674 26
## -err.- 26 52 78
Performance with theoretical thresholds
performance(logisticpred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.459 0.339
calculateConfusionMatrix(logisticpred.th, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
## predicted
## true Bad Good -err.-
## Bad 0.90/0.47 0.10/0.07 0.10
## Good 0.44/0.53 0.56/0.93 0.44
## -err.- 0.53 0.07 0.34
##
##
## Absolute confusion matrix:
## predicted
## true Bad Good -err.-
## Bad 270 30 30
## Good 309 391 309
## -err.- 309 30 339
performance(c50pred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.263 0.127
calculateConfusionMatrix(c50pred.th, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
## predicted
## true Bad Good -err.-
## Bad 0.89/0.74 0.11/0.05 0.11
## Good 0.13/0.26 0.87/0.95 0.13
## -err.- 0.26 0.05 0.13
##
##
## Absolute confusion matrix:
## predicted
## true Bad Good -err.-
## Bad 266 34 34
## Good 93 607 93
## -err.- 93 34 127
These performance values may be overly optimistic as we used the same data set for training and prediction, and resampling strategies should be preferred.
Cross-validated performance with theoretical thresholds
# we create a ResampleInstance (rin) that is used throughout the next several code chunks to get comparable performance values.
rin = makeResampleInstance("CV", iters = 5, task = credit.task,stratify=TRUE)
logisticLrn = makeLearner("classif.multinom", predict.type = "prob", predict.threshold = th, trace = FALSE)
logisticR = resample(logisticLrn, credit.task, resampling = rin, measures = list(credit.costs, mmce), show.info = FALSE)
logisticR
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.multinom
## Aggr perf: credit.costs.test.mean=0.5660000,mmce.test.mean=0.3700000
## Runtime: 0.729179
calculateConfusionMatrix(logisticR$pred)
## predicted
## true Bad Good -err.-
## Bad 251 49 49
## Good 321 379 321
## -err.- 321 49 370
c50rin = makeResampleInstance("CV", iters = 2, task = credit.task,stratify=TRUE)
c50Lrn = makeLearner("classif.C50", predict.type = "prob", predict.threshold = th)
c50R = resample(c50Lrn, credit.task, resampling = c50rin, measures = list(credit.costs, mmce), show.info = FALSE)
c50R
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.C50
## Aggr perf: credit.costs.test.mean=0.8060000,mmce.test.mean=0.3220000
## Runtime: 0.574812
calculateConfusionMatrix(c50R$pred)
## predicted
## true Bad Good -err.-
## Bad 179 121 121
## Good 201 499 201
## -err.- 201 121 322
If we are also interested in the cross-validated performance for the default threshold values we can call setThreshold on the resample prediction r$pred.
Cross-validated performance with default thresholds
performance(setThreshold(logisticR$pred, 0.5), measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.888 0.260
calculateConfusionMatrix(setThreshold(logisticR$pred, 0.5))
## predicted
## true Bad Good -err.-
## Bad 143 157 157
## Good 103 597 103
## -err.- 103 157 260
performance(setThreshold(c50R$pred, 0.5), measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.940 0.288
calculateConfusionMatrix(setThreshold(c50R$pred, 0.5))
## predicted
## true Bad Good -err.-
## Bad 137 163 163
## Good 125 575 125
## -err.- 125 163 288
-Useful in this regard is function “plotThreshVsPerf”" that you can use to plot the average costs as well as any other performance measure versus possible threshold values for the positive class in [0,1]. The underlying data is generated by “generateThreshVsPerfData”.
-The following plots show the cross-validated costs and error rate (mmce). The theoretical threshold th calculated above is indicated by the vertical line. As you can see from the left-hand plot the theoretical threshold seems a bit large.
Vertical line is theoretical threshhold value.
ld = generateThreshVsPerfData(logisticR, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(ld, mark.th = th)
plotROCCurves(ld)
performance(logisticR$pred, credit.costs)
## credit.costs
## 0.566
cd = generateThreshVsPerfData(c50R, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(cd, mark.th = th)
plotROCCurves(cd)
performance(c50R$pred, credit.costs)
## credit.costs
## 0.806
r = generateLearningCurveData(
learners = c("classif.multinom","classif.C50","classif.randomForest","classif.binomial","classif.naiveBayes","classif.nnet","classif.rpart"),
task = credit.task,
percs = seq(0.1, 1, by = 0.2),
measures = list(credit.costs,mmce),
resampling = rin,
show.info = FALSE)
## # weights: 88 (87 variable)
## initial value 55.451774
## iter 10 value 2.595330
## iter 20 value 0.005183
## final value 0.000088
## converged
## # weights: 88 (87 variable)
## initial value 55.451774
## iter 10 value 2.111343
## iter 20 value 0.007316
## final value 0.000078
## converged
## # weights: 88 (87 variable)
## initial value 55.451774
## iter 10 value 2.493589
## iter 20 value 0.008056
## final value 0.000081
## converged
## # weights: 88 (87 variable)
## initial value 55.451774
## iter 10 value 1.786255
## iter 20 value 0.006746
## final value 0.000080
## converged
## # weights: 88 (87 variable)
## initial value 55.451774
## iter 10 value 4.290061
## iter 20 value 0.007314
## final value 0.000064
## converged
## # weights: 88 (87 variable)
## initial value 166.355323
## iter 10 value 85.045936
## iter 20 value 76.476373
## iter 30 value 75.943140
## iter 40 value 75.716240
## iter 50 value 75.644182
## iter 60 value 75.629615
## iter 70 value 75.627644
## final value 75.627597
## converged
## # weights: 88 (87 variable)
## initial value 166.355323
## iter 10 value 62.018850
## iter 20 value 49.680962
## iter 30 value 47.926942
## iter 40 value 47.549319
## iter 50 value 47.469005
## iter 60 value 47.430395
## iter 70 value 47.420722
## iter 80 value 47.418688
## iter 90 value 47.416840
## iter 100 value 47.416736
## final value 47.416736
## stopped after 100 iterations
## # weights: 88 (87 variable)
## initial value 166.355323
## iter 10 value 71.873951
## iter 20 value 62.827414
## iter 30 value 62.194664
## iter 40 value 62.012860
## iter 50 value 61.965581
## iter 60 value 61.958452
## iter 70 value 61.958101
## iter 80 value 61.958043
## final value 61.957939
## converged
## # weights: 88 (87 variable)
## initial value 166.355323
## iter 10 value 73.976035
## iter 20 value 63.952094
## iter 30 value 63.392570
## iter 40 value 63.256136
## iter 50 value 63.214894
## iter 60 value 63.212312
## iter 70 value 63.212125
## final value 63.212122
## converged
## # weights: 88 (87 variable)
## initial value 166.355323
## iter 10 value 87.967087
## iter 20 value 82.995922
## iter 30 value 82.745730
## iter 40 value 82.647945
## iter 50 value 82.605405
## iter 60 value 82.600825
## iter 70 value 82.600620
## iter 70 value 82.600619
## iter 70 value 82.600619
## final value 82.600619
## converged
## # weights: 88 (87 variable)
## initial value 277.258872
## iter 10 value 163.703357
## iter 20 value 158.546805
## iter 30 value 158.231962
## iter 40 value 158.133812
## iter 50 value 158.125388
## final value 158.123948
## converged
## # weights: 88 (87 variable)
## initial value 277.258872
## iter 10 value 163.704049
## iter 20 value 155.268004
## iter 30 value 153.848808
## iter 40 value 153.588908
## iter 50 value 153.561463
## iter 60 value 153.556733
## iter 70 value 153.555600
## final value 153.555576
## converged
## # weights: 88 (87 variable)
## initial value 277.258872
## iter 10 value 153.719247
## iter 20 value 149.269071
## iter 30 value 148.855430
## iter 40 value 148.714222
## iter 50 value 148.700141
## iter 60 value 148.695222
## iter 70 value 148.693547
## final value 148.693490
## converged
## # weights: 88 (87 variable)
## initial value 277.258872
## iter 10 value 168.622209
## iter 20 value 164.826453
## iter 30 value 164.546844
## iter 40 value 164.440610
## iter 50 value 164.415808
## iter 60 value 164.413710
## iter 70 value 164.413326
## final value 164.413321
## converged
## # weights: 88 (87 variable)
## initial value 277.258872
## iter 10 value 153.079565
## iter 20 value 145.537838
## iter 30 value 145.104535
## iter 40 value 144.897361
## iter 50 value 144.873406
## iter 60 value 144.866377
## iter 70 value 144.865848
## final value 144.865842
## converged
## # weights: 88 (87 variable)
## initial value 388.162421
## iter 10 value 232.671690
## iter 20 value 226.245185
## iter 30 value 225.548771
## iter 40 value 225.373536
## iter 50 value 225.305203
## iter 60 value 225.296778
## final value 225.296200
## converged
## # weights: 88 (87 variable)
## initial value 388.162421
## iter 10 value 229.865069
## iter 20 value 222.176627
## iter 30 value 221.064701
## iter 40 value 220.881905
## iter 50 value 220.834746
## iter 60 value 220.823974
## final value 220.823275
## converged
## # weights: 88 (87 variable)
## initial value 388.162421
## iter 10 value 237.382713
## iter 20 value 227.797990
## iter 30 value 226.868004
## iter 40 value 226.774465
## iter 50 value 226.717161
## iter 60 value 226.710949
## iter 70 value 226.710428
## final value 226.710404
## converged
## # weights: 88 (87 variable)
## initial value 388.162421
## iter 10 value 242.284256
## iter 20 value 231.905918
## iter 30 value 230.539509
## iter 40 value 230.288691
## iter 50 value 230.184873
## iter 60 value 230.173845
## iter 70 value 230.172438
## final value 230.172324
## converged
## # weights: 88 (87 variable)
## initial value 388.162421
## iter 10 value 229.317219
## iter 20 value 223.127547
## iter 30 value 222.465732
## iter 40 value 222.429422
## iter 50 value 222.393899
## iter 60 value 222.390171
## final value 222.389857
## converged
## # weights: 88 (87 variable)
## initial value 499.065970
## iter 10 value 341.133368
## iter 20 value 319.046761
## iter 30 value 316.516863
## iter 40 value 316.288710
## iter 50 value 316.190248
## iter 60 value 316.146376
## iter 70 value 316.143165
## iter 80 value 316.142870
## final value 316.142863
## converged
## # weights: 88 (87 variable)
## initial value 499.065970
## iter 10 value 333.896303
## iter 20 value 311.426952
## iter 30 value 309.388689
## iter 40 value 309.024675
## iter 50 value 308.890051
## iter 60 value 308.854322
## iter 70 value 308.851498
## final value 308.851369
## converged
## # weights: 88 (87 variable)
## initial value 499.065970
## iter 10 value 340.427988
## iter 20 value 317.845775
## iter 30 value 315.646845
## iter 40 value 315.412784
## iter 50 value 315.347898
## iter 60 value 315.323141
## iter 70 value 315.321363
## final value 315.321224
## converged
## # weights: 88 (87 variable)
## initial value 499.065970
## iter 10 value 329.442532
## iter 20 value 302.833196
## iter 30 value 300.515772
## iter 40 value 300.208000
## iter 50 value 300.137908
## iter 60 value 300.103707
## iter 70 value 300.099614
## iter 80 value 300.099290
## iter 80 value 300.099289
## iter 80 value 300.099289
## final value 300.099289
## converged
## # weights: 88 (87 variable)
## initial value 499.065970
## iter 10 value 336.441609
## iter 20 value 312.196692
## iter 30 value 309.702533
## iter 40 value 309.488374
## iter 50 value 309.425159
## iter 60 value 309.405159
## iter 70 value 309.403762
## iter 80 value 309.403626
## final value 309.403622
## converged
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## # weights: 265
## initial value 47.417223
## iter 10 value 16.666789
## iter 20 value 5.496330
## iter 30 value 5.169194
## iter 40 value 5.001437
## iter 50 value 4.354381
## iter 60 value 2.236821
## iter 70 value 1.928299
## iter 80 value 1.917043
## iter 90 value 1.913647
## iter 100 value 1.912184
## final value 1.912184
## stopped after 100 iterations
## # weights: 265
## initial value 54.094845
## iter 10 value 18.303115
## iter 20 value 2.970236
## iter 30 value 0.006188
## final value 0.000065
## converged
## # weights: 265
## initial value 49.163708
## iter 10 value 12.850215
## iter 20 value 4.843500
## iter 30 value 4.781442
## iter 40 value 4.773044
## iter 50 value 4.770690
## iter 60 value 4.769070
## iter 70 value 4.768288
## iter 80 value 4.758711
## iter 90 value 4.507325
## iter 100 value 4.499915
## final value 4.499915
## stopped after 100 iterations
## # weights: 265
## initial value 58.264257
## iter 10 value 10.056085
## iter 20 value 0.071013
## iter 30 value 0.000291
## final value 0.000051
## converged
## # weights: 265
## initial value 56.573744
## iter 10 value 20.292272
## iter 20 value 10.819008
## iter 30 value 7.194146
## iter 40 value 2.192129
## iter 50 value 1.936640
## iter 60 value 1.911627
## iter 70 value 1.908690
## iter 80 value 1.903356
## iter 90 value 1.701907
## iter 100 value 1.390541
## final value 1.390541
## stopped after 100 iterations
## # weights: 265
## initial value 166.758949
## iter 10 value 93.146386
## iter 20 value 61.390655
## iter 30 value 56.580356
## iter 40 value 43.840075
## iter 50 value 37.057954
## iter 60 value 31.931312
## iter 70 value 31.571029
## iter 80 value 30.940648
## iter 90 value 28.339326
## iter 100 value 28.079109
## final value 28.079109
## stopped after 100 iterations
## # weights: 265
## initial value 175.601592
## iter 10 value 95.379915
## iter 20 value 39.441248
## iter 30 value 27.512617
## iter 40 value 25.511757
## iter 50 value 25.191820
## iter 60 value 24.962938
## iter 70 value 24.952808
## iter 80 value 24.950836
## iter 90 value 24.950499
## iter 100 value 24.926758
## final value 24.926758
## stopped after 100 iterations
## # weights: 265
## initial value 179.274000
## iter 10 value 56.504690
## iter 20 value 21.849506
## iter 30 value 14.227004
## iter 40 value 10.192803
## iter 50 value 9.797399
## iter 60 value 9.731449
## iter 70 value 9.722317
## iter 80 value 9.712765
## iter 90 value 9.668185
## iter 100 value 9.660674
## final value 9.660674
## stopped after 100 iterations
## # weights: 265
## initial value 153.032701
## iter 10 value 79.884552
## iter 20 value 54.592610
## iter 30 value 44.276020
## iter 40 value 42.846703
## iter 50 value 42.479299
## iter 60 value 41.546879
## iter 70 value 40.060629
## iter 80 value 36.414242
## iter 90 value 34.458801
## iter 100 value 34.339338
## final value 34.339338
## stopped after 100 iterations
## # weights: 265
## initial value 207.574535
## iter 10 value 88.249655
## iter 20 value 38.606332
## iter 30 value 27.138413
## iter 40 value 24.156102
## iter 50 value 23.331284
## iter 60 value 23.014699
## iter 70 value 22.102622
## iter 80 value 20.963154
## iter 90 value 20.829897
## iter 100 value 20.676576
## final value 20.676576
## stopped after 100 iterations
## # weights: 265
## initial value 300.166307
## iter 10 value 173.796700
## iter 20 value 106.012557
## iter 30 value 90.610795
## iter 40 value 76.186401
## iter 50 value 70.014890
## iter 60 value 68.732235
## iter 70 value 67.643890
## iter 80 value 67.594619
## iter 90 value 66.483072
## iter 100 value 66.451511
## final value 66.451511
## stopped after 100 iterations
## # weights: 265
## initial value 265.157893
## iter 10 value 182.393901
## iter 20 value 147.944842
## iter 30 value 134.409924
## iter 40 value 132.459430
## iter 50 value 132.422239
## iter 60 value 132.413803
## final value 132.413792
## converged
## # weights: 265
## initial value 281.004387
## iter 10 value 201.033977
## iter 20 value 115.481566
## iter 30 value 55.189431
## iter 40 value 45.676035
## iter 50 value 42.482631
## iter 60 value 41.334594
## iter 70 value 41.313745
## iter 80 value 41.307054
## iter 90 value 41.303210
## iter 100 value 41.300838
## final value 41.300838
## stopped after 100 iterations
## # weights: 265
## initial value 253.070087
## iter 10 value 154.068553
## iter 20 value 93.950672
## iter 30 value 67.440121
## iter 40 value 61.194147
## iter 50 value 60.381334
## iter 60 value 60.290531
## iter 70 value 59.749023
## iter 80 value 59.747795
## iter 90 value 59.747274
## iter 100 value 59.746867
## final value 59.746867
## stopped after 100 iterations
## # weights: 265
## initial value 249.085722
## iter 10 value 155.923332
## iter 20 value 118.489248
## iter 30 value 81.773268
## iter 40 value 73.483015
## iter 50 value 73.000214
## iter 60 value 72.990847
## iter 70 value 72.990305
## iter 80 value 72.990130
## iter 80 value 72.990130
## iter 80 value 72.990130
## final value 72.990130
## converged
## # weights: 265
## initial value 666.922034
## iter 10 value 340.896414
## iter 20 value 278.640237
## iter 30 value 211.315791
## iter 40 value 184.782626
## iter 50 value 173.505253
## iter 60 value 162.701404
## iter 70 value 140.900928
## iter 80 value 128.899376
## iter 90 value 120.989051
## iter 100 value 119.248707
## final value 119.248707
## stopped after 100 iterations
## # weights: 265
## initial value 529.705071
## final value 336.846094
## converged
## # weights: 265
## initial value 335.420843
## iter 10 value 228.272793
## iter 20 value 148.578873
## iter 30 value 123.138985
## iter 40 value 112.790077
## iter 50 value 110.264495
## iter 60 value 104.839059
## iter 70 value 103.712254
## iter 80 value 102.814911
## iter 90 value 102.787832
## iter 100 value 102.766672
## final value 102.766672
## stopped after 100 iterations
## # weights: 265
## initial value 447.015224
## iter 10 value 234.209031
## iter 20 value 160.351871
## iter 30 value 142.315090
## iter 40 value 130.749522
## iter 50 value 126.968136
## iter 60 value 125.639183
## iter 70 value 123.843697
## iter 80 value 123.000002
## iter 90 value 122.639544
## iter 100 value 120.286839
## final value 120.286839
## stopped after 100 iterations
## # weights: 265
## initial value 387.349223
## final value 343.761636
## converged
## # weights: 265
## initial value 582.077642
## iter 10 value 426.629330
## iter 20 value 376.941453
## iter 30 value 346.857895
## iter 40 value 326.376557
## iter 50 value 319.192690
## iter 60 value 315.600863
## iter 70 value 314.415944
## iter 80 value 313.904408
## iter 90 value 313.554359
## iter 100 value 313.476216
## final value 313.476216
## stopped after 100 iterations
## # weights: 265
## initial value 618.452165
## iter 10 value 351.196348
## iter 20 value 289.950104
## iter 30 value 238.129824
## iter 40 value 200.232636
## iter 50 value 171.568625
## iter 60 value 162.672175
## iter 70 value 157.947567
## iter 80 value 156.797795
## iter 90 value 155.533260
## iter 100 value 155.508830
## final value 155.508830
## stopped after 100 iterations
## # weights: 265
## initial value 546.603317
## iter 10 value 306.636197
## iter 20 value 195.221544
## iter 30 value 158.083791
## iter 40 value 144.054515
## iter 50 value 136.745041
## iter 60 value 135.244195
## iter 70 value 135.042908
## iter 80 value 135.026427
## iter 90 value 135.015925
## iter 100 value 135.006839
## final value 135.006839
## stopped after 100 iterations
## # weights: 265
## initial value 534.145360
## iter 10 value 353.601081
## iter 20 value 291.582245
## iter 30 value 244.551060
## iter 40 value 209.298471
## iter 50 value 168.188663
## iter 60 value 160.280033
## iter 70 value 158.842533
## iter 80 value 158.210762
## iter 90 value 157.220018
## iter 100 value 156.809775
## final value 156.809775
## stopped after 100 iterations
## # weights: 265
## initial value 469.550377
## iter 10 value 339.504710
## iter 20 value 246.806508
## iter 30 value 210.828874
## iter 40 value 189.239498
## iter 50 value 187.934199
## iter 60 value 187.674383
## final value 187.674178
## converged
plotLearningCurve(r)
Randomlrn = makeLearner("classif.randomForest", predict.type = "prob", fix.factors.prediction = TRUE)
rin = makeResampleInstance("CV", iters = 5, task = credit.task,stratify=TRUE)
Ranr = resample(Randomlrn, credit.task, rin, measures = list(credit.costs, mmce), show.info = FALSE)
Ranr
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.randomForest
## Aggr perf: credit.costs.test.mean=0.9970000,mmce.test.mean=0.2410000
## Runtime: 13.1616
Prediction based on theoretical threshold
Ranpred.th = setThreshold(Ranr$pred, threshold = th)
calculateConfusionMatrix(Ranpred.th)
## predicted
## true Bad Good -err.-
## Bad 281 19 19
## Good 449 251 449
## -err.- 449 19 468
performance(Ranpred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.544 0.468
Tuning Threshold
dr = generateThreshVsPerfData(Ranr, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(dr, mark.th = th)
plotROCCurves(dr)
performance(Ranr$pred,credit.costs)
## credit.costs
## 0.997
NBlrn = makeLearner("classif.naiveBayes", predict.type = "prob", fix.factors.prediction = TRUE)
rin = makeResampleInstance("CV", iters = 5, task = credit.task,stratify=TRUE)
NBr = resample(NBlrn, credit.task, rin, measures = list(credit.costs, mmce), show.info = FALSE)
NBr
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.naiveBayes
## Aggr perf: credit.costs.test.mean=0.8430000,mmce.test.mean=0.2990000
## Runtime: 1.80011
NBpred.th = setThreshold(NBr$pred, threshold = th)
calculateConfusionMatrix(NBpred.th)
## predicted
## true Bad Good -err.-
## Bad 186 114 114
## Good 204 496 204
## -err.- 204 114 318
performance(NBpred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.774 0.318
Nr = generateThreshVsPerfData(NBr, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(Nr, mark.th = th)
plotROCCurves(Nr)
performance(NBr$pred,credit.costs)
## credit.costs
## 0.843
Blrn = makeLearner("classif.binomial", predict.type = "prob", fix.factors.prediction = TRUE)
rin = makeResampleInstance("CV", iters = 5, task = credit.task,stratify=TRUE)
Br = resample(Blrn, credit.task, rin, measures = list(credit.costs, mmce), show.info = FALSE)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
Br
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.binomial
## Aggr perf: credit.costs.test.mean=0.8670000,mmce.test.mean=0.2590000
## Runtime: 1.05699
Bpred.th = setThreshold(Br$pred, threshold = th)
calculateConfusionMatrix(Bpred.th)
## predicted
## true Bad Good -err.-
## Bad 248 52 52
## Good 320 380 320
## -err.- 320 52 372
performance(Bpred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.580 0.372
Bir = generateThreshVsPerfData(Br, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(Bir, mark.th = th)
plotROCCurves(Bir)
performance(Br$pred,credit.costs)
## credit.costs
## 0.867
NNetlrn = makeLearner("classif.nnet", predict.type = "prob", fix.factors.prediction = TRUE)
rin = makeResampleInstance("CV", iters = 5, task = credit.task,stratify=TRUE)
NNetr = resample(NNetlrn, credit.task, rin, measures = list(credit.costs, mmce), show.info = FALSE)
## # weights: 265
## initial value 490.204147
## iter 10 value 342.287514
## iter 20 value 273.446072
## iter 30 value 248.841417
## iter 40 value 233.693913
## iter 50 value 232.260794
## iter 60 value 232.215665
## iter 70 value 231.643887
## iter 80 value 231.640187
## final value 231.639737
## converged
## # weights: 265
## initial value 551.121693
## iter 10 value 338.369039
## iter 20 value 246.110161
## iter 30 value 199.738319
## iter 40 value 175.201100
## iter 50 value 169.570904
## iter 60 value 168.113628
## iter 70 value 167.544964
## iter 80 value 167.360382
## iter 90 value 167.299214
## iter 100 value 167.275374
## final value 167.275374
## stopped after 100 iterations
## # weights: 265
## initial value 510.680162
## iter 10 value 387.420743
## iter 20 value 298.305989
## iter 30 value 248.314511
## iter 40 value 207.418534
## iter 50 value 187.465019
## iter 60 value 174.132230
## iter 70 value 168.548746
## iter 80 value 166.580116
## iter 90 value 165.965496
## iter 100 value 165.677977
## final value 165.677977
## stopped after 100 iterations
## # weights: 265
## initial value 642.106337
## iter 10 value 457.276472
## iter 20 value 352.030297
## iter 30 value 301.105344
## iter 40 value 257.756740
## iter 50 value 235.095420
## iter 60 value 217.961354
## iter 70 value 199.572060
## iter 80 value 195.603780
## iter 90 value 194.023493
## iter 100 value 192.849924
## final value 192.849924
## stopped after 100 iterations
## # weights: 265
## initial value 676.250582
## iter 10 value 393.568370
## iter 20 value 254.650261
## iter 30 value 211.163566
## iter 40 value 192.885277
## iter 50 value 182.743164
## iter 60 value 179.430178
## iter 70 value 177.597071
## iter 80 value 177.539724
## iter 90 value 177.516201
## iter 100 value 177.271688
## final value 177.271688
## stopped after 100 iterations
NNetr
## Resample Result
## Task: ml_credit_dataset
## Learner: classif.nnet
## Aggr perf: credit.costs.test.mean=0.9250000,mmce.test.mean=0.2970000
## Runtime: 1.09525
NNpred.th = setThreshold(NNetr$pred, threshold = th)
calculateConfusionMatrix(NNpred.th)
## predicted
## true Bad Good -err.-
## Bad 189 111 111
## Good 228 472 228
## -err.- 228 111 339
performance(NNpred.th, measures = list(credit.costs, mmce))
## credit.costs mmce
## 0.783 0.339
NNr = generateThreshVsPerfData(NNetr, measures = list(fpr, tpr, credit.costs, mmce))
plotThreshVsPerf(NNr, mark.th = th)
plotROCCurves(NNr)
performance(NNetr$pred,credit.costs)
## credit.costs
## 0.925