###Carseats data is available in ISLR package
#ISLR:Introduction to statistical learning Book
library(ISLR)
data(package="ISLR")
carseats<-Carseats
View(carseats)
library(tree)## for decision tree
library(caret)## for partitioning of data into train and test sets.
## Loading required package: lattice
## Loading required package: ggplot2
##The following commands are used to separate
#data into train data and test data
#split contains the indices to make the
#corresponding rows as train data. Other than the
#indices in split formed as test data.
#createDataPartition() is used to generate indices in
#split variable:p=.7 means 70% of entire dataset is 
#going to be as part of train data and remaining 30%
#make as test data.
split<-createDataPartition(carseats$Urban,p=.7,list = F)
train<-carseats[split,]
test<-carseats[-split,]
#Construction of decision tree.
#y~x:Y is predicted variable and x is one or more
#independent variable(s).if x is '.' then all
#remaining variables except y is considered for 
#predicting y value.
dtree<-tree(Urban~.,carseats)
plot(dtree)
text(dtree)

##cross validating the decision tree and also gives
#information about at what value(height of tree)
#the accuracy is going up and going down.
cv.dtree<-cv.tree(dtree,FUN = prune.misclass)
plot(cv.dtree)

##Based on above plot we can determine at what value
#i.e., height of tree, we need to prune the children
#of decision tree.
prune.dtree<-prune.misclass(dtree,best = 6)
plot(prune.dtree)
text(prune.dtree)

#Predicting test data labels from the pruned decision
#tree.
dtree.pred<-predict(prune.dtree,test,type='class')
#we create confusion matrix to determine false positives
#,false negatives,accuracy,sensitivity and specificty.
confusionMatrix(dtree.pred,test$Urban)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   6   1
##        Yes 29  83
##                                          
##                Accuracy : 0.7479         
##                  95% CI : (0.6601, 0.823)
##     No Information Rate : 0.7059         
##     P-Value [Acc > NIR] : 0.1833         
##                                          
##                   Kappa : 0.2081         
##  Mcnemar's Test P-Value : 8.244e-07      
##                                          
##             Sensitivity : 0.17143        
##             Specificity : 0.98810        
##          Pos Pred Value : 0.85714        
##          Neg Pred Value : 0.74107        
##              Prevalence : 0.29412        
##          Detection Rate : 0.05042        
##    Detection Prevalence : 0.05882        
##       Balanced Accuracy : 0.57976        
##                                          
##        'Positive' Class : No             
##