train <- data.frame(claimID = c(1,2,3),
                    rearend = c(TRUE, FALSE, TRUE),
                    fraud = c (TRUE, FALSE, TRUE))
train
##   claimID rearend fraud
## 1       1    TRUE  TRUE
## 2       2   FALSE FALSE
## 3       3    TRUE  TRUE
library(rpart)
mytree <- rpart(fraud~rearend, data=train, method= "class")
#predict weather there was fraud or not depeding on nthe rear end attribute 
#if we want to add an extra attribute we would add a + after each attribute 
# its a classification problem so method is class 
# if we want to predict a value we would set method to 'anova'
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667) *
mytree<- rpart(fraud~rearend,
               data = train,
               method = "class",
               minsplit =2,
               minbucket=1)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667)  
##   2) rearend< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) rearend>=0.5 2 0 TRUE (0.0000000 1.0000000) *
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
fancyRpartPlot(mytree, caption=NULL)

mytree<- rpart(
  fraud~rearend,
  data=train,
  method = "class",
  parms = list(split='information'),
  minsplit =2, minbucket =1
)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 TRUE (0.3333333 0.6666667)  
##   2) rearend< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) rearend>=0.5 2 0 TRUE (0.0000000 1.0000000) *
fancyRpartPlot(mytree, caption = NULL)

train <- data.frame(claimID = c(1,2,3),
                    rearend = c(TRUE, FALSE, TRUE),
                    fraud = c (TRUE, FALSE, FALSE))
train
##   claimID rearend fraud
## 1       1    TRUE  TRUE
## 2       2   FALSE FALSE
## 3       3    TRUE FALSE
mytree<- rpart(
  fraud~rearend,
  data=train,
  method = "class",
  parms = list(split='information'),
  minsplit =2, minbucket =1, cp=-1
)
mytree
## n= 3 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 3 1 FALSE (0.6666667 0.3333333)  
##   2) rearend< 0.5 1 0 FALSE (1.0000000 0.0000000) *
##   3) rearend>=0.5 2 1 FALSE (0.5000000 0.5000000) *
train <- data.frame(claimID = 1:7,
                    rearend = c(TRUE,TRUE,FALSE,FALSE,FALSE, FALSE,FALSE),
                    whiplash = c(TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE),
                    fraud = c (TRUE,TRUE,TRUE,FALSE,FALSE, FALSE, FALSE))
train
##   claimID rearend whiplash fraud
## 1       1    TRUE     TRUE  TRUE
## 2       2    TRUE     TRUE  TRUE
## 3       3   FALSE     TRUE  TRUE
## 4       4   FALSE     TRUE FALSE
## 5       5   FALSE     TRUE FALSE
## 6       6   FALSE    FALSE FALSE
## 7       7   FALSE    FALSE FALSE
mytree<- rpart(
  fraud~rearend+whiplash,
  data=train,
  method = "class",
  maxdepth=1,
  minsplit =2, minbucket =1
)
mytree
## n= 7 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 7 3 FALSE (0.5714286 0.4285714)  
##   2) rearend< 0.5 5 1 FALSE (0.8000000 0.2000000) *
##   3) rearend>=0.5 2 0 TRUE (0.0000000 1.0000000) *
fancyRpartPlot(mytree, caption = NULL)

lossmatrix<- matrix(c(0,1,3,0),byrow=TRUE, nrow = 2)
lossmatrix
##      [,1] [,2]
## [1,]    0    1
## [2,]    3    0
mytree <-rpart( fraud~rearend+whiplash,
                data=train,
                method = "class",
                maxdepth=1,
                minsplit =2, minbucket =1,
                parms = list(loss=lossmatrix))
fancyRpartPlot(mytree, caption = NULL)

train <- data.frame(
  claimid = c(1,2,3,4,5),
  activity = factor (x = c("active", "very active", "very active", "inactive", "very inactive"),
                     levels = c ("very inactive", "inactive", "active", "very active"), order=TRUE),
  fraud = c (FALSE,TRUE, TRUE, FALSE, TRUE)
)
train
##   claimid      activity fraud
## 1       1        active FALSE
## 2       2   very active  TRUE
## 3       3   very active  TRUE
## 4       4      inactive FALSE
## 5       5 very inactive  TRUE
mytree<- rpart(
  fraud~activity,
  data=train,
  method = "class",
  
  minsplit =2, minbucket =1
)

fancyRpartPlot(mytree, caption = NULL)

train <- data.frame(claimID = 1:10,
                    rearend = c(TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,TRUE, TRUE, FALSE),
                    whiplash = c(TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE, FALSE, FALSE, TRUE),
                    activity = factor (x = c("active", "very active", "very active", "inactive", "very inactive","inactive", "very inactive","active", "active", "very active"),
                                       levels = c ("very inactive", "inactive", "active", "very active"), order=TRUE),
                    fraud = c (FALSE,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE, FALSE, FALSE, TRUE))
train
##    claimID rearend whiplash      activity fraud
## 1        1    TRUE     TRUE        active FALSE
## 2        2    TRUE     TRUE   very active  TRUE
## 3        3    TRUE     TRUE   very active  TRUE
## 4        4   FALSE     TRUE      inactive FALSE
## 5        5   FALSE     TRUE very inactive FALSE
## 6        6   FALSE    FALSE      inactive  TRUE
## 7        7   FALSE    FALSE very inactive  TRUE
## 8        8    TRUE    FALSE        active FALSE
## 9        9    TRUE    FALSE        active FALSE
## 10      10   FALSE     TRUE   very active  TRUE
#GROW A FULL TREEE
mytree<- rpart(
  fraud~rearend+whiplash+activity,
  data=train,
  method = "class",
  
  minsplit =2, minbucket =1,
  cp=-1
)

fancyRpartPlot(mytree, caption = NULL)

typeof(mytree)
## [1] "list"
print(mytree$method)
## [1] "class"
print(mytree$finalmodel)
## NULL
names(mytree)
##  [1] "frame"               "where"               "call"               
##  [4] "terms"               "cptable"             "method"             
##  [7] "parms"               "control"             "functions"          
## [10] "numresp"             "splits"              "csplit"             
## [13] "variable.importance" "y"                   "ordered"
mytree$variable.importance
##  activity  whiplash   rearend 
## 3.0000000 2.0000000 0.8571429
printcp(mytree)
## 
## Classification tree:
## rpart(formula = fraud ~ rearend + whiplash + activity, data = train, 
##     method = "class", minsplit = 2, minbucket = 1, cp = -1)
## 
## Variables actually used in tree construction:
## [1] activity rearend  whiplash
## 
## Root node error: 5/10 = 0.5
## 
## n= 10 
## 
##     CP nsplit rel error xerror    xstd
## 1  0.6      0       1.0    2.0 0.00000
## 2  0.2      1       0.4    0.4 0.25298
## 3 -1.0      3       0.0    0.4 0.25298
mytree<- prune(mytree, cp=0.21)
fancyRpartPlot(mytree) 

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.