library(rpart)
library(rattle)
## Loading required package: RGtk2
## Rattle: A free graphical interface for data mining with R.
## Version 3.5.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
library(crossval)
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
library(vcd)
## Loading required package: grid
library(Metrics)
Read the Caravan.csvfile
d1<- read.csv("C:/Users/vananga/Downloads/Caravan2.csv")
d1 <- read.csv(file.choose(), header=TRUE)
d1.ori<-d1
set.seed(99)
tr <- d1.ori[sample(row.names(d1.ori), size = round(nrow(d1.ori)*0.5)), ]
te <- d1.ori[!(row.names(d1.ori) %in% row.names(tr)), ]
Reset the original training and test data - just to be sure
tr1 <- tr
te1 <- te
te2 <-te
tr1$Purchase = as.factor(tr1$Purchase)
fit1 <- rpart(formula=Purchase ~ .,data=tr1,control=rpart.control(minsplit=20, minbucket=1, cp=0.008))
I am not getting any model…! So, fit1 is empty model and fancyRpartPlot(fit1) gives error (naturally)
fit1
## n= 2911
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2911 181 0 (0.93782205 0.06217795)
## 2) PPERSAUT< 5.5 1755 53 0 (0.96980057 0.03019943) *
## 3) PPERSAUT>=5.5 1156 128 0 (0.88927336 0.11072664)
## 6) MOSTYPE>=12.5 785 56 0 (0.92866242 0.07133758) *
## 7) MOSTYPE< 12.5 371 72 0 (0.80592992 0.19407008)
## 14) PBRAND< 3.5 213 24 0 (0.88732394 0.11267606)
## 28) MBERHOOG< 5.5 181 15 0 (0.91712707 0.08287293) *
## 29) MBERHOOG>=5.5 32 9 0 (0.71875000 0.28125000)
## 58) MBERMIDD< 1.5 23 3 0 (0.86956522 0.13043478) *
## 59) MBERMIDD>=1.5 9 3 1 (0.33333333 0.66666667) *
## 15) PBRAND>=3.5 158 48 0 (0.69620253 0.30379747)
## 30) MBERMIDD< 6.5 142 37 0 (0.73943662 0.26056338) *
## 31) MBERMIDD>=6.5 16 5 1 (0.31250000 0.68750000) *
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 593178 31.7 1168576 62.5 750400 40.1
## Vcells 1298370 10.0 2657200 20.3 2657059 20.3
fancyRpartPlot(fit1)
printcp(fit1)
##
## Classification tree:
## rpart(formula = Purchase ~ ., data = tr1, control = rpart.control(minsplit = 20,
## minbucket = 1, cp = 0.008))
##
## Variables actually used in tree construction:
## [1] MBERHOOG MBERMIDD MOSTYPE PBRAND PPERSAUT
##
## Root node error: 181/2911 = 0.062178
##
## n= 2911
##
## CP nsplit rel error xerror xstd
## 1 0.0082873 0 1.00000 1.000 0.071982
## 2 0.0080000 6 0.95028 1.105 0.075402
print(fit1)
## n= 2911
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2911 181 0 (0.93782205 0.06217795)
## 2) PPERSAUT< 5.5 1755 53 0 (0.96980057 0.03019943) *
## 3) PPERSAUT>=5.5 1156 128 0 (0.88927336 0.11072664)
## 6) MOSTYPE>=12.5 785 56 0 (0.92866242 0.07133758) *
## 7) MOSTYPE< 12.5 371 72 0 (0.80592992 0.19407008)
## 14) PBRAND< 3.5 213 24 0 (0.88732394 0.11267606)
## 28) MBERHOOG< 5.5 181 15 0 (0.91712707 0.08287293) *
## 29) MBERHOOG>=5.5 32 9 0 (0.71875000 0.28125000)
## 58) MBERMIDD< 1.5 23 3 0 (0.86956522 0.13043478) *
## 59) MBERMIDD>=1.5 9 3 1 (0.33333333 0.66666667) *
## 15) PBRAND>=3.5 158 48 0 (0.69620253 0.30379747)
## 30) MBERMIDD< 6.5 142 37 0 (0.73943662 0.26056338) *
## 31) MBERMIDD>=6.5 16 5 1 (0.31250000 0.68750000) *
plot(fit1)
fit1$cptable[which.min(fit1$cptable[,"xerror"]),"CP"]
## [1] 0.008287293