#Reading the data set as a dataframe
library(readr)
weather <- read_csv("weather.csv")
## Parsed with column specification:
## cols(
## outlook = col_character(),
## temperature = col_double(),
## humidity = col_double(),
## windy = col_logical(),
## play = col_character()
## )
View(weather)
summary(weather)
## outlook temperature humidity windy
## Length:14 Min. :64.00 Min. :65.00 Mode :logical
## Class :character 1st Qu.:69.25 1st Qu.:71.25 FALSE:8
## Mode :character Median :72.00 Median :82.50 TRUE :6
## Mean :73.57 Mean :81.64
## 3rd Qu.:78.75 3rd Qu.:90.00
## Max. :85.00 Max. :96.00
## play
## Length:14
## Class :character
## Mode :character
##
##
##
dim(weather)
## [1] 14 5
s<- sample(14,12)
w_train <- weather[s,]
w_test <- weather[-s,]
dim(w_train)
## [1] 12 5
dim(w_test)
## [1] 2 5
library("rpart")
dtm <- rpart(play~., w_train, method = "class", control = rpart.control(minsplit = 2))
dtm
## n= 12
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 12 4 yes (0.3333333 0.6666667)
## 2) outlook=sunny 5 2 no (0.6000000 0.4000000)
## 4) humidity>=77.5 3 0 no (1.0000000 0.0000000) *
## 5) humidity< 77.5 2 0 yes (0.0000000 1.0000000) *
## 3) outlook=overcast,rainy 7 1 yes (0.1428571 0.8571429)
## 6) temperature< 66.5 1 0 no (1.0000000 0.0000000) *
## 7) temperature>=66.5 6 0 yes (0.0000000 1.0000000) *
library("rpart.plot")
rpart.plot(dtm, type = 4, extra = 102)
# In training phase decision tree algorithm shows 100% accuracy in classification (from above fig).
p <- predict(dtm, w_test, type = "class")
table(w_test$play, p)
## p
## no yes
## no 0 1
## yes 1 0
str(p)
## Factor w/ 2 levels "no","yes": 1 2
## - attr(*, "names")= chr [1:2] "1" "2"
str(w_test$play)
## chr [1:2] "yes" "no"
library("caret")
## Loading required package: lattice
## Loading required package: ggplot2
confusionMatrix(p, as.factor(w_test$play))
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 0 1
## yes 1 0
##
## Accuracy : 0
## 95% CI : (0, 0.8419)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 1
##
## Kappa : -1
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.0
## Specificity : 0.0
## Pos Pred Value : 0.0
## Neg Pred Value : 0.0
## Prevalence : 0.5
## Detection Rate : 0.0
## Detection Prevalence : 0.5
## Balanced Accuracy : 0.0
##
## 'Positive' Class : no
##