##Understanding Decision Tree using confusion matrix
## Courtesy : http://rpubs.com/newajay/219102
## Attempted to use weather database
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
data("weather")
names(weather)
## [1] "Date" "Location" "MinTemp" "MaxTemp"
## [5] "Rainfall" "Evaporation" "Sunshine" "WindGustDir"
## [9] "WindGustSpeed" "WindDir9am" "WindDir3pm" "WindSpeed9am"
## [13] "WindSpeed3pm" "Humidity9am" "Humidity3pm" "Pressure9am"
## [17] "Pressure3pm" "Cloud9am" "Cloud3pm" "Temp9am"
## [21] "Temp3pm" "RainToday" "RISK_MM" "RainTomorrow"
##Model in line with rattle
fit2 <- ctree(RainTomorrow ~ Pressure3pm + Cloud3pm + Sunshine ,data=weather)
plot(fit2)

print(fit2)
##
## Conditional inference tree with 8 terminal nodes
##
## Response: RainTomorrow
## Inputs: Pressure3pm, Cloud3pm, Sunshine
## Number of observations: 366
##
## 1) Cloud3pm <= 6; criterion = 1, statistic = 54.954
## 2) Pressure3pm <= 1011.8; criterion = 1, statistic = 32.745
## 3) Sunshine <= 8.4; criterion = 0.998, statistic = 11.237
## 4)* weights = 12
## 3) Sunshine > 8.4
## 5)* weights = 33
## 2) Pressure3pm > 1011.8
## 6) Sunshine <= 6.1; criterion = 0.957, statistic = 5.982
## 7)* weights = 20
## 6) Sunshine > 6.1
## 8)* weights = 190
## 1) Cloud3pm > 6
## 9) Pressure3pm <= 1016; criterion = 0.999, statistic = 13.894
## 10) Sunshine <= 9.2; criterion = 0.969, statistic = 6.561
## 11)* weights = 52
## 10) Sunshine > 9.2
## 12)* weights = 7
## 9) Pressure3pm > 1016
## 13) Cloud3pm <= 7; criterion = 0.992, statistic = 9.068
## 14)* weights = 42
## 13) Cloud3pm > 7
## 15)* weights = 10
table(Predict(fit2), weather$RainTomorrow)
##
## No Yes
## No 281 21
## Yes 19 45
## Model based on understanding
fit21 <- ctree(RainTomorrow ~ MinTemp + MaxTemp + Rainfall + Sunshine + WindGustSpeed
+ RainToday ,data=weather)
plot(fit21)

print(fit21)
##
## Conditional inference tree with 5 terminal nodes
##
## Response: RainTomorrow
## Inputs: MinTemp, MaxTemp, Rainfall, Sunshine, WindGustSpeed, RainToday
## Number of observations: 366
##
## 1) Sunshine <= 6.4; criterion = 1, statistic = 54.709
## 2) WindGustSpeed <= 57; criterion = 0.987, statistic = 9.348
## 3)* weights = 90
## 2) WindGustSpeed > 57
## 4)* weights = 12
## 1) Sunshine > 6.4
## 5) WindGustSpeed <= 63; criterion = 1, statistic = 18.542
## 6) MinTemp <= 17; criterion = 0.995, statistic = 11.169
## 7)* weights = 245
## 6) MinTemp > 17
## 8)* weights = 8
## 5) WindGustSpeed > 63
## 9)* weights = 11
table(Predict(fit21), weather$RainTomorrow)
##
## No Yes
## No 294 49
## Yes 6 17
##response <- predict(fit21 ,weather)
##confusionMatrix(data=response,
## reference=RainTomorrow ,
## positive='yes')
## Check Random Forest
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## Some issue in the Random Forest
##fit3 <- randomForest(RainToday ~ MinTemp + MaxTemp
## + Rainfall + Sunshine + WindGustSpeed
## ,data=weather)
##print(fit3)
##importance(fit3)
##varImpPlot(fit3)
##response <- predict(fit3 ,weather)
##library(e1071)
#install.packages("caret")
##library(caret)
##confusionMatrix(data=response,
## reference=RainTomorrow ,
## positive='yes')