HW # 6 - Random Forest with credit dataset

# load the credit dataset
credit <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml10/credit.csv")

# random forest with default settings
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.3.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
set.seed(300)
rf <- randomForest(default ~ ., data = credit)
rf
## 
## Call:
##  randomForest(formula = default ~ ., data = credit) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 4
## 
##         OOB estimate of  error rate: 23.8%
## Confusion matrix:
##      no yes class.error
## no  640  60  0.08571429
## yes 178 122  0.59333333

Code that I left out:

library(caret) ctrl <- trainControl(method = “repeatedcv”, number = 10, repeats = 10)

auto-tune a random forest

grid_rf <- expand.grid(.mtry = c(2, 4, 8, 16))

set.seed(300) m_rf <- train(default ~ ., data = credit, method = “rf”, metric = “Kappa”, trControl = ctrl, tuneGrid = grid_rf) m_rf

auto-tune a boosted C5.0 decision tree

grid_c50 <- expand.grid(.model = “tree”, .trials = c(10, 20, 30, 40), .winnow = “FALSE”)

set.seed(300) m_c50 <- train(default ~ ., data = credit, method = “C5.0”, metric = “Kappa”, trControl = ctrl, tuneGrid = grid_c50) m_c50

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).