R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

train<-data.frame(ClaimID=c(1,2,3), RearEnd=c(TRUE,FALSE,TRUE), Fraud=c(TRUE,FALSE,TRUE))
train
##   ClaimID RearEnd Fraud
## 1       1    TRUE  TRUE
## 2       2   FALSE FALSE
## 3       3    TRUE  TRUE
library(rpart)
## Warning: package 'rpart' was built under R version 4.3.3
mytree<-rpart(Fraud~RearEnd,data=train, method="class")
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667) *
mytree<-rpart(Fraud~RearEnd,data=train, method="class",minsplit=2, minbucket=1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667)  
##   2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) RearEnd>=0.5 2 0 TRUE (0.0000000 1.0000000) *
library(rattle)
## Warning: package 'rattle' was built under R version 4.3.3
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.3
library(RColorBrewer)
#plot mytree
fancyRpartPlot(mytree,caption = NULL)

#different splitting procedure i.e. information
mytree<-rpart(Fraud~RearEnd,data=train, method="class",parms=list(split="information"),minsplit=2, minbucket=1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667)  
##   2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) RearEnd>=0.5 2 0 TRUE (0.0000000 1.0000000) *
#plot mytree once again
fancyRpartPlot(mytree,caption = NULL)

#change the dataset, no tree was generated for this dataset after execution
train<-data.frame(ClaimID=c(1,2,3), RearEnd=c(TRUE,FALSE,TRUE), Fraud=c(TRUE,FALSE,FALSE))
train
##   ClaimID RearEnd Fraud
## 1       1    TRUE  TRUE
## 2       2   FALSE FALSE
## 3       3    TRUE FALSE
mytree<-rpart(Fraud~RearEnd,data=train, method="class")
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 FALSE (0.6666667 0.3333333) *
mytree<-rpart(Fraud~RearEnd,data=train, method="class",minsplit=2, minbucket=1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 FALSE (0.6666667 0.3333333) *
#plot mytree
#fancyRpartPlot(mytree,caption = NULL)

#different splitting procedure i.e. information
mytree<-rpart(Fraud~RearEnd,data=train, method="class",parms=list(split="information"),minsplit=2, minbucket=1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 FALSE (0.6666667 0.3333333) *
#plot mytree once again
#fancyRpartPlot(mytree,caption = NULL)


#add CP to New change the dataset, no tree was generated for this dataset after execution
train<-data.frame(ClaimID=c(1,2,3), RearEnd=c(TRUE,FALSE,TRUE), Fraud=c(TRUE,FALSE,FALSE))
train
##   ClaimID RearEnd Fraud
## 1       1    TRUE  TRUE
## 2       2   FALSE FALSE
## 3       3    TRUE FALSE
#cp =-1 will generate the tree but we don't want to do it as it may generate more complexed tree 
mytree<-rpart(Fraud~RearEnd,data=train, method="class",minsplit=2, minbucket=1, cp=-1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 FALSE (0.6666667 0.3333333)  
##   2) RearEnd< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) RearEnd>=0.5 2 1 FALSE (0.5000000 0.5000000) *
#fancyRpartPlot(mytree,caption = NULL)

#different dataset
train<-data.frame(ClaimID= 1:7, RearEnd=c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE),Whiplash=c(TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE), Fraud=c(TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE))
train
##   ClaimID RearEnd Whiplash Fraud
## 1       1    TRUE     TRUE  TRUE
## 2       2    TRUE     TRUE  TRUE
## 3       3   FALSE     TRUE  TRUE
## 4       4   FALSE     TRUE FALSE
## 5       5   FALSE     TRUE FALSE
## 6       6   FALSE    FALSE FALSE
## 7       7   FALSE    FALSE FALSE
mytree<-rpart(Fraud~RearEnd+Whiplash,data=train, method="class", maxdepth=1, minsplit=2, minbucket=1)
mytree
## n= 7 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 7 3 FALSE (0.5714286 0.4285714)  
##   2) RearEnd< 0.5 5 1 FALSE (0.8000000 0.2000000) *
##   3) RearEnd>=0.5 2 0 TRUE (0.0000000 1.0000000) *
#fancyRpartPlot(mytree,caption = NULL)

lossmatrix<-matrix(c(0,1,3,0), byrow=TRUE,nrow=2)
lossmatrix
##      [,1] [,2]
## [1,]    0    1
## [2,]    3    0
mytree<-rpart(Fraud~RearEnd+Whiplash,data=train, method="class", maxdepth=1, minsplit=2, minbucket=1, parms=list(loss=lossmatrix))
mytree
## n= 7 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 7 4 TRUE (0.5714286 0.4285714)  
##   2) Whiplash< 0.5 2 0 FALSE (1.0000000 0.0000000) *
##   3) Whiplash>=0.5 5 2 TRUE (0.4000000 0.6000000) *
#fancyRpartPlot(mytree,caption = NULL)

#different dataset
train<-data.frame(ClaimID= c(1,2,3,4,5), Activity=factor(x= c("Active","very Active","very Active", "inActive","very inActive"),levels= c("very inActive","inActive", "Active", "very Active")),Fraud= c(FALSE,TRUE,TRUE,FALSE,TRUE))
train
##   ClaimID      Activity Fraud
## 1       1        Active FALSE
## 2       2   very Active  TRUE
## 3       3   very Active  TRUE
## 4       4      inActive FALSE
## 5       5 very inActive  TRUE
mytree<-rpart(Fraud~Activity,data=train, method="class", minsplit=2, minbucket=1)
mytree
## n= 5 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 5 2 TRUE (0.4000000 0.6000000)  
##   2) Activity=inActive,Active 2 0 FALSE (1.0000000 0.0000000) *
##   3) Activity=very inActive,very Active 3 0 TRUE (0.0000000 1.0000000) *
#fancyRpartPlot(mytree,caption = NULL)


train<-data.frame(ClaimID= c(1,2,3,4,5), Activity=factor(x= c("Active","very Active","very Active", "inActive","very inActive"),levels= c("very inActive","inActive", "Active", "very Active"),order=TRUE), Fraud= c(FALSE,TRUE,TRUE,FALSE,TRUE))
train
##   ClaimID      Activity Fraud
## 1       1        Active FALSE
## 2       2   very Active  TRUE
## 3       3   very Active  TRUE
## 4       4      inActive FALSE
## 5       5 very inActive  TRUE
mytree<-rpart(Fraud~Activity,data=train, method="class", minsplit=2, minbucket=1)
mytree
## n= 5 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 5 2 TRUE (0.4000000 0.6000000)  
##   2) Activity=very inActive,inActive,Active 3 1 FALSE (0.6666667 0.3333333)  
##     4) Activity=inActive,Active,very Active 2 0 FALSE (1.0000000 0.0000000) *
##     5) Activity=very inActive 1 0 TRUE (0.0000000 1.0000000) *
##   3) Activity=very Active 2 0 TRUE (0.0000000 1.0000000) *
#fancyRpartPlot(mytree,caption = NULL)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.