HW # 6 - Random Forest with credit dataset
# load the credit dataset
credit <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml10/credit.csv")
# random forest with default settings
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.3.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
set.seed(300)
rf <- randomForest(default ~ ., data = credit)
rf
##
## Call:
## randomForest(formula = default ~ ., data = credit)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 23.8%
## Confusion matrix:
## no yes class.error
## no 640 60 0.08571429
## yes 178 122 0.59333333
Code that I left out:
library(caret) ctrl <- trainControl(method = “repeatedcv”, number = 10, repeats = 10)
grid_rf <- expand.grid(.mtry = c(2, 4, 8, 16))
set.seed(300) m_rf <- train(default ~ ., data = credit, method = “rf”, metric = “Kappa”, trControl = ctrl, tuneGrid = grid_rf) m_rf
grid_c50 <- expand.grid(.model = “tree”, .trials = c(10, 20, 30, 40), .winnow = “FALSE”)
set.seed(300) m_c50 <- train(default ~ ., data = credit, method = “C5.0”, metric = “Kappa”, trControl = ctrl, tuneGrid = grid_c50) m_c50
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).