library(rpart)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(carData)
library(reshape2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.4     v purrr   0.3.4
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x purrr::lift()   masks caret::lift()
## x car::recode()   masks dplyr::recode()
## x purrr::some()   masks car::some()
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
#Load dataset
IR <- iris
View(IR)

attach(IR)
names(IR)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
summary(IR)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
df3 <- melt(as.data.frame(IR[,-5]))
## No id variables; using all as measure variables
###########################CART METHOD##########################################

fit <- rpart(Species~.,data = IR)

printcp(fit)
## 
## Classification tree:
## rpart(formula = Species ~ ., data = IR)
## 
## Variables actually used in tree construction:
## [1] Petal.Length Petal.Width 
## 
## Root node error: 100/150 = 0.66667
## 
## n= 150 
## 
##     CP nsplit rel error xerror     xstd
## 1 0.50      0      1.00   1.19 0.049592
## 2 0.44      1      0.50   0.71 0.061150
## 3 0.01      2      0.06   0.09 0.029086
fit
## n= 150 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)  
##   2) Petal.Length< 2.45 50   0 setosa (1.00000000 0.00000000 0.00000000) *
##   3) Petal.Length>=2.45 100  50 versicolor (0.00000000 0.50000000 0.50000000)  
##     6) Petal.Width< 1.75 54   5 versicolor (0.00000000 0.90740741 0.09259259) *
##     7) Petal.Width>=1.75 46   1 virginica (0.00000000 0.02173913 0.97826087) *
#Prune data with CP value that contains lowest error
#Plot the model
plot(fit)
text(fit, cex = 0.9, xpd = TRUE, pos = 2.5)

#Predict test data
pred <- predict(fit, IR, type = "class")
p_frame <- data.frame(IR, pred)
View(p_frame)

sample_n(IR,4)          #Data inspection
##   Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1          4.4         3.0          1.3         0.2     setosa
## 2          5.7         2.6          3.5         1.0 versicolor
## 3          6.8         2.8          4.8         1.4 versicolor
## 4          4.4         2.9          1.4         0.2     setosa
#Training and testing fit 
training_samples <- IR$Species %>%
  createDataPartition(p =0.8,list = FALSE)

train.data <- IR[training_samples,]
test.data <- IR[-training_samples,] 

#Build model
set.seed(123)
fit1 <- rpart(Species~. ,data = train.data, method = "class")
fit1
## n= 120 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 120 80 setosa (0.33333333 0.33333333 0.33333333)  
##   2) Petal.Length< 2.6 40  0 setosa (1.00000000 0.00000000 0.00000000) *
##   3) Petal.Length>=2.6 80 40 versicolor (0.00000000 0.50000000 0.50000000)  
##     6) Petal.Length< 4.75 37  1 versicolor (0.00000000 0.97297297 0.02702703) *
##     7) Petal.Length>=4.75 43  4 virginica (0.00000000 0.09302326 0.90697674) *
par(xpd = NA)
plot(fit1)
text(fit1, digits = 3)

#Make predictions
predicted_class <- fit1 %>%
  predict(test.data, type = "class")
head(predicted_class)
##      1      8      9     10     15     22 
## setosa setosa setosa setosa setosa setosa 
## Levels: setosa versicolor virginica
#Compute model accuracy
mean(predicted_class == test.data$Species)
## [1] 0.9333333
#The overall accuracy of the model is 93.33%