VIDEO 4
# Read in the data
setwd('C:/Users/daria.alekseeva/Documents/Edx/Trees/')
stevens = read.csv("stevens.csv")
str(stevens)
## 'data.frame': 566 obs. of 9 variables:
## $ Docket : Factor w/ 566 levels "00-1011","00-1045",..: 63 69 70 145 97 181 242 289 334 436 ...
## $ Term : int 1994 1994 1994 1994 1995 1995 1996 1997 1997 1999 ...
## $ Circuit : Factor w/ 13 levels "10th","11th",..: 4 11 7 3 9 11 13 11 12 2 ...
## $ Issue : Factor w/ 11 levels "Attorneys","CivilRights",..: 5 5 5 5 9 5 5 5 5 3 ...
## $ Petitioner: Factor w/ 12 levels "AMERICAN.INDIAN",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Respondent: Factor w/ 12 levels "AMERICAN.INDIAN",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LowerCourt: Factor w/ 2 levels "conser","liberal": 2 2 2 1 1 1 1 1 1 1 ...
## $ Unconst : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Reverse : int 1 1 1 1 1 0 1 1 1 1 ...
# Split the data
library(caTools)
set.seed(200)
spl = sample.split(stevens$Reverse, SplitRatio = 0.7)
Train = subset(stevens, spl==TRUE)
Test = subset(stevens, spl==FALSE)
# Install rpart library
#install.packages("rpart")
library(rpart)
#install.packages("rpart.plot")
library(rpart.plot)
# CART model
StevensTree = rpart(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, method="class", minbucket=25)
prp(StevensTree)

# Make predictions
PredictCART = predict(StevensTree, newdata = Test, type = "class")
table(Test$Reverse, PredictCART)
## PredictCART
## 0 1
## 0 35 42
## 1 26 67
(41+71)/(41+36+22+71)
## [1] 0.6588235
# ROC curve
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
PredictROC = predict(StevensTree, newdata = Test)
PredictROC
## 0 1
## 4 0.1444444 0.8555556
## 5 0.1444444 0.8555556
## 6 0.1444444 0.8555556
## 7 0.1444444 0.8555556
## 8 0.1444444 0.8555556
## 14 0.3888889 0.6111111
## 21 0.3888889 0.6111111
## 28 0.3888889 0.6111111
## 34 0.1444444 0.8555556
## 35 0.1444444 0.8555556
## 36 0.6603774 0.3396226
## 42 0.3888889 0.6111111
## 44 0.1444444 0.8555556
## 55 0.3888889 0.6111111
## 58 0.3888889 0.6111111
## 60 0.1444444 0.8555556
## 66 0.1444444 0.8555556
## 67 0.1444444 0.8555556
## 68 0.1444444 0.8555556
## 70 0.6603774 0.3396226
## 71 0.3888889 0.6111111
## 72 0.3888889 0.6111111
## 77 0.1444444 0.8555556
## 78 0.6603774 0.3396226
## 79 0.3888889 0.6111111
## 87 0.6486486 0.3513514
## 95 0.8260870 0.1739130
## 101 0.8260870 0.1739130
## 102 0.8260870 0.1739130
## 107 0.8260870 0.1739130
## 112 0.8260870 0.1739130
## 116 0.8260870 0.1739130
## 120 0.8260870 0.1739130
## 127 0.8260870 0.1739130
## 133 0.8260870 0.1739130
## 138 0.8260870 0.1739130
## 139 0.8260870 0.1739130
## 140 0.8260870 0.1739130
## 141 0.8260870 0.1739130
## 153 0.3888889 0.6111111
## 154 0.6603774 0.3396226
## 161 0.6603774 0.3396226
## 165 0.3888889 0.6111111
## 166 0.3888889 0.6111111
## 169 0.3888889 0.6111111
## 174 0.6603774 0.3396226
## 175 0.6603774 0.3396226
## 179 0.1444444 0.8555556
## 181 0.1444444 0.8555556
## 183 0.1444444 0.8555556
## 190 0.1444444 0.8555556
## 196 0.1444444 0.8555556
## 197 0.3888889 0.6111111
## 198 0.6603774 0.3396226
## 204 0.3888889 0.6111111
## 211 0.8260870 0.1739130
## 213 0.3066667 0.6933333
## 219 0.8260870 0.1739130
## 221 0.8260870 0.1739130
## 225 0.3888889 0.6111111
## 227 0.3066667 0.6933333
## 231 0.3066667 0.6933333
## 235 0.3888889 0.6111111
## 241 0.3066667 0.6933333
## 242 0.3066667 0.6933333
## 243 0.3066667 0.6933333
## 244 0.3066667 0.6933333
## 245 0.3066667 0.6933333
## 246 0.6486486 0.3513514
## 247 0.3066667 0.6933333
## 251 0.6603774 0.3396226
## 260 0.6603774 0.3396226
## 266 0.6603774 0.3396226
## 268 0.3888889 0.6111111
## 270 0.6603774 0.3396226
## 274 0.6603774 0.3396226
## 282 0.3066667 0.6933333
## 285 0.3888889 0.6111111
## 290 0.3066667 0.6933333
## 292 0.3888889 0.6111111
## 297 0.3066667 0.6933333
## 298 0.6603774 0.3396226
## 300 0.3888889 0.6111111
## 301 0.3888889 0.6111111
## 303 0.3888889 0.6111111
## 307 0.3066667 0.6933333
## 310 0.3066667 0.6933333
## 311 0.6603774 0.3396226
## 312 0.3888889 0.6111111
## 315 0.6603774 0.3396226
## 320 0.3888889 0.6111111
## 322 0.6603774 0.3396226
## 325 0.6486486 0.3513514
## 328 0.6603774 0.3396226
## 329 0.3888889 0.6111111
## 332 0.3066667 0.6933333
## 337 0.3888889 0.6111111
## 338 0.6486486 0.3513514
## 345 0.3888889 0.6111111
## 348 0.3066667 0.6933333
## 355 0.6603774 0.3396226
## 357 0.3066667 0.6933333
## 359 0.3888889 0.6111111
## 360 0.6486486 0.3513514
## 366 0.3888889 0.6111111
## 371 0.3066667 0.6933333
## 373 0.3888889 0.6111111
## 376 0.3066667 0.6933333
## 378 0.3888889 0.6111111
## 380 0.3066667 0.6933333
## 383 0.3888889 0.6111111
## 387 0.3066667 0.6933333
## 393 0.3066667 0.6933333
## 403 0.6603774 0.3396226
## 405 0.6603774 0.3396226
## 407 0.6603774 0.3396226
## 410 0.3888889 0.6111111
## 412 0.6603774 0.3396226
## 414 0.3066667 0.6933333
## 424 0.3888889 0.6111111
## 430 0.1444444 0.8555556
## 434 0.8260870 0.1739130
## 439 0.1444444 0.8555556
## 448 0.1444444 0.8555556
## 453 0.1444444 0.8555556
## 456 0.1444444 0.8555556
## 457 0.1444444 0.8555556
## 459 0.1444444 0.8555556
## 461 0.1444444 0.8555556
## 462 0.1444444 0.8555556
## 463 0.1444444 0.8555556
## 464 0.1444444 0.8555556
## 465 0.1444444 0.8555556
## 466 0.1444444 0.8555556
## 467 0.1444444 0.8555556
## 472 0.1444444 0.8555556
## 473 0.1444444 0.8555556
## 475 0.1444444 0.8555556
## 478 0.1444444 0.8555556
## 480 0.1444444 0.8555556
## 481 0.1444444 0.8555556
## 488 0.1444444 0.8555556
## 490 0.1444444 0.8555556
## 493 0.8260870 0.1739130
## 495 0.1444444 0.8555556
## 503 0.3066667 0.6933333
## 505 0.6486486 0.3513514
## 508 0.6486486 0.3513514
## 510 0.6486486 0.3513514
## 511 0.3066667 0.6933333
## 512 0.6486486 0.3513514
## 516 0.3066667 0.6933333
## 517 0.3066667 0.6933333
## 521 0.6486486 0.3513514
## 524 0.6486486 0.3513514
## 525 0.6486486 0.3513514
## 529 0.3066667 0.6933333
## 533 0.3066667 0.6933333
## 536 0.3066667 0.6933333
## 540 0.6486486 0.3513514
## 541 0.6486486 0.3513514
## 542 0.6486486 0.3513514
## 546 0.8260870 0.1739130
## 548 0.3066667 0.6933333
## 551 0.8260870 0.1739130
## 554 0.6486486 0.3513514
## 557 0.6486486 0.3513514
## 558 0.3066667 0.6933333
## 561 0.3066667 0.6933333
## 563 0.8260870 0.1739130
pred = prediction(PredictROC[,2], Test$Reverse)
perf = performance(pred, "tpr", "fpr")
plot(perf)

# Compute the AUC of the CART model from the previous video, using the following command in your R console:
as.numeric(performance(pred, "auc")@y.values)
## [1] 0.6856584
# CART model with bucket size 5
StevensTree = rpart(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, method="class", minbucket=5)
prp(StevensTree)

# CART model with bucket size 100
StevensTree = rpart(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, method="class", minbucket=100)
prp(StevensTree)
# VIDEO 5 - Random Forests
# Install randomForest package
#install.packages("randomForest")
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
# Build random forest model
StevensForest = randomForest(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, ntree=200, nodesize=25 )
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
# Convert outcome to factor
Train$Reverse = as.factor(Train$Reverse)
Test$Reverse = as.factor(Test$Reverse)
# Try again
StevensForest = randomForest(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, ntree=200, nodesize=25 )
# Make predictions
PredictForest = predict(StevensForest, newdata = Test)
table(Test$Reverse, PredictForest)
## PredictForest
## 0 1
## 0 43 34
## 1 22 71
(40+74)/(40+37+19+74)
## [1] 0.6705882
# VIDEO 6
# Install cross-validation packages
#install.packages("caret")
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
#install.packages("e1071")
library(e1071)
# Define cross-validation experiment
numFolds = trainControl( method = "cv", number = 10 )
cpGrid = expand.grid( .cp = seq(0.01,0.5,0.01))
# Perform the cross validation
train(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, method = "rpart", trControl = numFolds, tuneGrid = cpGrid )
## CART
##
## 396 samples
## 8 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 357, 357, 356, 356, 357, 357, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa Accuracy SD Kappa SD
## 0.01 0.6414103 0.26524035 0.053154217 0.10809392
## 0.02 0.6339744 0.25549154 0.032310914 0.06169834
## 0.03 0.6389744 0.26992125 0.040226367 0.08084489
## 0.04 0.6441026 0.28267457 0.049889204 0.10360430
## 0.05 0.6616026 0.32203736 0.054799632 0.11075452
## 0.06 0.6616026 0.32203736 0.054799632 0.11075452
## 0.07 0.6616026 0.32203736 0.054799632 0.11075452
## 0.08 0.6616026 0.32203736 0.054799632 0.11075452
## 0.09 0.6616026 0.32203736 0.054799632 0.11075452
## 0.10 0.6616026 0.32203736 0.054799632 0.11075452
## 0.11 0.6616026 0.32203736 0.054799632 0.11075452
## 0.12 0.6616026 0.32203736 0.054799632 0.11075452
## 0.13 0.6616026 0.32203736 0.054799632 0.11075452
## 0.14 0.6616026 0.32203736 0.054799632 0.11075452
## 0.15 0.6616026 0.32203736 0.054799632 0.11075452
## 0.16 0.6616026 0.32203736 0.054799632 0.11075452
## 0.17 0.6616026 0.32203736 0.054799632 0.11075452
## 0.18 0.6616026 0.32203736 0.054799632 0.11075452
## 0.19 0.6616026 0.32203736 0.054799632 0.11075452
## 0.20 0.6616026 0.32203736 0.054799632 0.11075452
## 0.21 0.6616026 0.32203736 0.054799632 0.11075452
## 0.22 0.6616026 0.32203736 0.054799632 0.11075452
## 0.23 0.6616026 0.32203736 0.054799632 0.11075452
## 0.24 0.6410897 0.27321847 0.058935986 0.13444037
## 0.25 0.5935897 0.14790079 0.046677154 0.13564530
## 0.26 0.5605128 0.05759208 0.025546836 0.09326580
## 0.27 0.5605128 0.05759208 0.025546836 0.09326580
## 0.28 0.5453846 0.00000000 0.005958436 0.00000000
## 0.29 0.5453846 0.00000000 0.005958436 0.00000000
## 0.30 0.5453846 0.00000000 0.005958436 0.00000000
## 0.31 0.5453846 0.00000000 0.005958436 0.00000000
## 0.32 0.5453846 0.00000000 0.005958436 0.00000000
## 0.33 0.5453846 0.00000000 0.005958436 0.00000000
## 0.34 0.5453846 0.00000000 0.005958436 0.00000000
## 0.35 0.5453846 0.00000000 0.005958436 0.00000000
## 0.36 0.5453846 0.00000000 0.005958436 0.00000000
## 0.37 0.5453846 0.00000000 0.005958436 0.00000000
## 0.38 0.5453846 0.00000000 0.005958436 0.00000000
## 0.39 0.5453846 0.00000000 0.005958436 0.00000000
## 0.40 0.5453846 0.00000000 0.005958436 0.00000000
## 0.41 0.5453846 0.00000000 0.005958436 0.00000000
## 0.42 0.5453846 0.00000000 0.005958436 0.00000000
## 0.43 0.5453846 0.00000000 0.005958436 0.00000000
## 0.44 0.5453846 0.00000000 0.005958436 0.00000000
## 0.45 0.5453846 0.00000000 0.005958436 0.00000000
## 0.46 0.5453846 0.00000000 0.005958436 0.00000000
## 0.47 0.5453846 0.00000000 0.005958436 0.00000000
## 0.48 0.5453846 0.00000000 0.005958436 0.00000000
## 0.49 0.5453846 0.00000000 0.005958436 0.00000000
## 0.50 0.5453846 0.00000000 0.005958436 0.00000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.23.
# Create a new CART model
StevensTreeCV = rpart(Reverse ~ Circuit + Issue + Petitioner + Respondent + LowerCourt + Unconst, data = Train, method="class", cp = 0.18)
# Make predictions
PredictCV = predict(StevensTreeCV, newdata = Test, type = "class")
table(Test$Reverse, PredictCV)
## PredictCV
## 0 1
## 0 51 26
## 1 28 65
(59+64)/(59+18+29+64)
## [1] 0.7235294
prp(StevensTreeCV)
