Case study : Play Tennis, weather data set

#Reading the data set as a dataframe
library(readr)
weather <- read_csv("weather.csv")
## Parsed with column specification:
## cols(
##   outlook = col_character(),
##   temperature = col_double(),
##   humidity = col_double(),
##   windy = col_logical(),
##   play = col_character()
## )
View(weather)
summary(weather)
##    outlook           temperature       humidity       windy        
##  Length:14          Min.   :64.00   Min.   :65.00   Mode :logical  
##  Class :character   1st Qu.:69.25   1st Qu.:71.25   FALSE:8        
##  Mode  :character   Median :72.00   Median :82.50   TRUE :6        
##                     Mean   :73.57   Mean   :81.64                  
##                     3rd Qu.:78.75   3rd Qu.:90.00                  
##                     Max.   :85.00   Max.   :96.00                  
##      play          
##  Length:14         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
dim(weather)
## [1] 14  5
s<- sample(14,12)
w_train <- weather[s,]
w_test <- weather[-s,]
dim(w_train)
## [1] 12  5
dim(w_test)
## [1] 2 5
library("rpart")
dtm <- rpart(play~., w_train, method = "class", control = rpart.control(minsplit = 2))
dtm
## n= 12 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 12 4 yes (0.3333333 0.6666667)  
##   2) outlook=sunny 5 2 no (0.6000000 0.4000000)  
##     4) humidity>=77.5 3 0 no (1.0000000 0.0000000) *
##     5) humidity< 77.5 2 0 yes (0.0000000 1.0000000) *
##   3) outlook=overcast,rainy 7 1 yes (0.1428571 0.8571429)  
##     6) temperature< 66.5 1 0 no (1.0000000 0.0000000) *
##     7) temperature>=66.5 6 0 yes (0.0000000 1.0000000) *
library("rpart.plot")
rpart.plot(dtm, type = 4, extra = 102)

# In training phase decision tree algorithm shows 100% accuracy in classification (from above fig).

p <- predict(dtm, w_test, type = "class")
table(w_test$play, p)
##      p
##       no yes
##   no   0   1
##   yes  1   0
str(p)
##  Factor w/ 2 levels "no","yes": 1 2
##  - attr(*, "names")= chr [1:2] "1" "2"
str(w_test$play)
##  chr [1:2] "yes" "no"
library("caret")
## Loading required package: lattice
## Loading required package: ggplot2
confusionMatrix(p, as.factor(w_test$play))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction no yes
##        no   0   1
##        yes  1   0
##                                      
##                Accuracy : 0          
##                  95% CI : (0, 0.8419)
##     No Information Rate : 0.5        
##     P-Value [Acc > NIR] : 1          
##                                      
##                   Kappa : -1         
##                                      
##  Mcnemar's Test P-Value : 1          
##                                      
##             Sensitivity : 0.0        
##             Specificity : 0.0        
##          Pos Pred Value : 0.0        
##          Neg Pred Value : 0.0        
##              Prevalence : 0.5        
##          Detection Rate : 0.0        
##    Detection Prevalence : 0.5        
##       Balanced Accuracy : 0.0        
##                                      
##        'Positive' Class : no         
##