This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
# depth of Tree
# how many leaves should be in each bucket
#predict using decision trees
train<-data.frame(ClaimID= c(1,2,3),
RearEnd=c(TRUE, FALSE,TRUE),
Fraud= c(TRUE,FALSE,TRUE))
train
## ClaimID RearEnd Fraud
## 1 1 TRUE TRUE
## 2 2 FALSE FALSE
## 3 3 TRUE TRUE
# to grow our decisin tree we need to intsallRpar
library(rpart)# bring the rpart into your r environment
## Warning: package 'rpart' was built under R version 4.3.3
mytree<-rpart(Fraud~RearEnd, data=train,method="class")
# dependent varable is Fraud~ RearEnd
mytree
## n= 3
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3 1 TRUE (0.3333333 0.6666667) *
#minsplit is the minimum no of observation that must exist in a node in other for spit to be attmpted
#minbuket is the minimum number of observation in any terminal node
mytree<-rpart(Fraud~RearEnd,
data=train,
method="class",
minsplit=2,
minbucket=1)
mytree
## n= 3
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3 1 TRUE (0.3333333 0.6666667)
## 2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
## 3) RearEnd>=0.5 2 0 TRUE (0.0000000 1.0000000) *
library(rattle)
## Warning: package 'rattle' was built under R version 4.3.3
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.3
library(RColorBrewer)
# plot mytree
fancyRpartPlot(mytree, caption=NULL)
#if rearend is 0 ,yes (false) no fraud
#if rearend is 0 ,no that means rearend is 1 (True) fraud
mytree<-rpart(
Fraud~RearEnd,
data=train,
method="class",
parms=list(split='information'),
minsplit=2, minbucket=1
)
mytree
## n= 3
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3 1 TRUE (0.3333333 0.6666667)
## 2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
## 3) RearEnd>=0.5 2 0 TRUE (0.0000000 1.0000000) *
fancyRpartPlot(mytree, caption = NULL)
train<-data.frame(ClaimID= c(1,2,3),
RearEnd=c(TRUE, FALSE,TRUE),
Fraud= c(TRUE,FALSE,FALSE))
train
## ClaimID RearEnd Fraud
## 1 1 TRUE TRUE
## 2 2 FALSE FALSE
## 3 3 TRUE FALSE
#--------------
mytree<-rpart(
Fraud~RearEnd,
data=train,
method="class",
minsplit=2,
minbucket=1,
cp = -1)
mytree
## n= 3
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3 1 FALSE (0.6666667 0.3333333)
## 2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
## 3) RearEnd>=0.5 2 1 FALSE (0.5000000 0.5000000) *
fancyRpartPlot(mytree, caption = NULL)
train<-data.frame(ClaimID=1:7,
RearEnd=c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE),
Whiplash=c(TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE),
Fraud=c(TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE))
train
## ClaimID RearEnd Whiplash Fraud
## 1 1 TRUE TRUE TRUE
## 2 2 TRUE TRUE TRUE
## 3 3 FALSE TRUE TRUE
## 4 4 FALSE TRUE FALSE
## 5 5 FALSE TRUE FALSE
## 6 6 FALSE FALSE FALSE
## 7 7 FALSE FALSE FALSE
mytree<-rpart(
Fraud~RearEnd + Whiplash,
data=train,
method="class",
maxdepth=1,
minsplit=2,
minbucket=1)
fancyRpartPlot(mytree, caption = NULL)
lossmatrix<-matrix(c(0,1,3,0), byrow=TRUE, nrow=2)
lossmatrix
## [,1] [,2]
## [1,] 0 1
## [2,] 3 0
mytree<-rpart(
Fraud~ RearEnd + Whiplash,
data = train,
method="class",
maxdepth=1,
minsplit=2,
minbucket=1,
parms =list(loss=lossmatrix))
fancyRpartPlot(mytree, caption = NULL)
train<-data.frame(
ClaimID=c(1,2,3,4,5),
Activity=factor(x =c ("active","very active","very active",
"inactive","very inactive"), levels=c("very inactive","inactive",
"active","very active")),
Fraud=c(FALSE,TRUE,TRUE,FALSE,TRUE))
train
## ClaimID Activity Fraud
## 1 1 active FALSE
## 2 2 very active TRUE
## 3 3 very active TRUE
## 4 4 inactive FALSE
## 5 5 very inactive TRUE
train<-data.frame(
ClaimID=c(1,2,3,4,5),
Activity=factor(x =c ("active","very active","very active",
"inactive","very inactive"), levels=c("very inactive","inactive",
"active","very active"), order=TRUE),
Fraud=c(FALSE,TRUE,TRUE,FALSE,TRUE))
train
## ClaimID Activity Fraud
## 1 1 active FALSE
## 2 2 very active TRUE
## 3 3 very active TRUE
## 4 4 inactive FALSE
## 5 5 very inactive TRUE
mytree<-rpart(Fraud~ Activity,
data=train,
method="class",
minsplit=2,
minbucket=1)
fancyRpartPlot(mytree, caption = NULL)
train <- data.frame(
ClaimID = 1:10,
RearEnd = c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE),
Whiplash = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE),
Activity = factor( x = c("active", "very active", "very active", "inactive", "very inactive", "inactive", "very inactive", "active", "active", "very active"),
levels = c("very inactive", "inactive", "active", "very active"), ordered=TRUE),
Fraud = c(FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
)
train
## ClaimID RearEnd Whiplash Activity Fraud
## 1 1 TRUE TRUE active FALSE
## 2 2 TRUE TRUE very active TRUE
## 3 3 TRUE TRUE very active TRUE
## 4 4 FALSE TRUE inactive FALSE
## 5 5 FALSE TRUE very inactive FALSE
## 6 6 FALSE FALSE inactive TRUE
## 7 7 FALSE FALSE very inactive TRUE
## 8 8 TRUE FALSE active FALSE
## 9 9 TRUE FALSE active FALSE
## 10 10 FALSE TRUE very active TRUE
mytree<-rpart(
Fraud~ RearEnd + Whiplash + Activity,
data=train,
method="class",
minsplit=2,
minbucket=1,
cp=-1)
fancyRpartPlot(mytree, caption = NULL)
names(mytree)
## [1] "frame" "where" "call"
## [4] "terms" "cptable" "method"
## [7] "parms" "control" "functions"
## [10] "numresp" "splits" "csplit"
## [13] "variable.importance" "y" "ordered"
mytree$variable.importance
## Activity Whiplash RearEnd
## 3.0000000 2.0000000 0.8571429
mytree$method
## [1] "class"
print(mytree$method)
## [1] "class"
printcp(mytree)
##
## Classification tree:
## rpart(formula = Fraud ~ RearEnd + Whiplash + Activity, data = train,
## method = "class", minsplit = 2, minbucket = 1, cp = -1)
##
## Variables actually used in tree construction:
## [1] Activity RearEnd Whiplash
##
## Root node error: 5/10 = 0.5
##
## n= 10
##
## CP nsplit rel error xerror xstd
## 1 0.6 0 1.0 2.0 0.00000
## 2 0.2 1 0.4 0.4 0.25298
## 3 -1.0 3 0.0 0.4 0.25298
mytree<-prune(mytree, cp =0.21)
fancyRpartPlot(mytree)