## 'data.frame': 1000 obs. of 3 variables:
## $ cls: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ x1 : num 0.2008 0.0166 0.2287 0.1264 0.6008 ...
## $ x2 : num 0.678 1.5766 -0.5595 -0.0938 -0.2984 ...
##
## 0 1
## 980 20
library(rpart)
treeimb <- rpart(cls ~ ., data = hacide.train)
pred.treeimb <- predict(treeimb, newdata = hacide.test)
accuracy.meas(hacide.test$cls, pred.treeimb[,2])
##
## Call:
## accuracy.meas(response = hacide.test$cls, predicted = pred.treeimb[,
## 2])
##
## Examples are labelled as positive when predicted is greater than 0.5
##
## precision: 1.000
## recall: 0.200
## F: 0.167
## Area under the curve (AUC): 0.600
data_balanced_over <- ovun.sample(cls ~ ., data = hacide.train, method = "over",N = 1960)$data
table(data_balanced_over$cls)
##
## 0 1
## 980 980
data_balanced_under <- ovun.sample(cls ~ ., data = hacide.train, method = "under", N = 40, seed = 1)$data
table(data_balanced_under$cls)
##
## 0 1
## 20 20
data_balanced_both <- ovun.sample(cls ~ ., data = hacide.train, method = "both", p=0.5, N=1000, seed = 1)$data
table(data_balanced_both$cls)
##
## 0 1
## 520 480
##
## 0 1
## 520 480
#build decision tree models
tree.rose <- rpart(cls ~ ., data = data.rose)
tree.over <- rpart(cls ~ ., data = data_balanced_over)
tree.under <- rpart(cls ~ ., data = data_balanced_under)
tree.both <- rpart(cls ~ ., data = data_balanced_both)
#make predictions on unseen data
pred.tree.rose <- predict(tree.rose, newdata = hacide.test)
pred.tree.over <- predict(tree.over, newdata = hacide.test)
pred.tree.under <- predict(tree.under, newdata = hacide.test)
pred.tree.both <- predict(tree.both, newdata = hacide.test)
ROSE.holdout <- ROSE.eval(cls ~ ., data = hacide.train, learner = rpart,
method.assess = "holdout", extr.pred = function(obj)obj[,2], seed = 1)
ROSE.holdout
##
## Call:
## ROSE.eval(formula = cls ~ ., data = hacide.train, learner = rpart,
## extr.pred = function(obj) obj[, 2], method.assess = "holdout",
## seed = 1)
##
## Holdout estimate of auc: 0.980
## Area under the curve (AUC): 0.993
## Area under the curve (AUC): 0.798
## Area under the curve (AUC): 0.924
## Area under the curve (AUC): 0.798
## T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class
## 1 105 7.3 1.5 1.5 -0.1 negative
## 2 67 23.3 7.4 1.8 -0.6 positive
## 3 111 8.4 1.5 0.8 1.2 negative
## 4 89 14.3 4.1 0.5 0.2 positive
## 5 105 9.5 1.8 1.6 3.6 negative
## 6 110 20.3 3.7 0.6 0.2 positive
## T3resin Thyroxin Triiodothyronine Thyroidstimulating
## Min. : 65.0 Min. : 0.500 Min. : 0.20 Min. : 0.10
## 1st Qu.:103.0 1st Qu.: 7.100 1st Qu.: 1.35 1st Qu.: 1.00
## Median :110.0 Median : 9.200 Median : 1.70 Median : 1.30
## Mean :109.6 Mean : 9.805 Mean : 2.05 Mean : 2.88
## 3rd Qu.:117.5 3rd Qu.:11.300 3rd Qu.: 2.20 3rd Qu.: 1.70
## Max. :144.0 Max. :25.300 Max. :10.00 Max. :56.40
## TSH_value Class
## Min. :-0.700 negative:180
## 1st Qu.: 0.550 positive: 35
## Median : 2.000
## Mean : 4.199
## 3rd Qu.: 4.100
## Max. :56.300
numPositive <- length(which(newthyroid1$Class == "positive"))
numNegative <- length(which(newthyroid1$Class == "negative"))
nInstances <- numNegative - numPositive
newSamples <- pdfos(dataset = newthyroid1, numInstances = 80,
classAttr = "Class")
# Bind a balanced dataset
newDataset <- rbind(newthyroid1, newSamples)
head(newthyroid1)
## T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class
## 1 105 7.3 1.5 1.5 -0.1 negative
## 2 67 23.3 7.4 1.8 -0.6 positive
## 3 111 8.4 1.5 0.8 1.2 negative
## 4 89 14.3 4.1 0.5 0.2 positive
## 5 105 9.5 1.8 1.6 3.6 negative
## 6 110 20.3 3.7 0.6 0.2 positive
## T3resin Thyroxin Triiodothyronine Thyroidstimulating
## Min. : 65.0 Min. : 0.500 Min. : 0.20 Min. : 0.10
## 1st Qu.:103.0 1st Qu.: 7.100 1st Qu.: 1.35 1st Qu.: 1.00
## Median :110.0 Median : 9.200 Median : 1.70 Median : 1.30
## Mean :109.6 Mean : 9.805 Mean : 2.05 Mean : 2.88
## 3rd Qu.:117.5 3rd Qu.:11.300 3rd Qu.: 2.20 3rd Qu.: 1.70
## Max. :144.0 Max. :25.300 Max. :10.00 Max. :56.40
## TSH_value Class
## Min. :-0.700 negative:180
## 1st Qu.: 0.550 positive: 35
## Median : 2.000
## Mean : 4.199
## 3rd Qu.: 4.100
## Max. :56.300