To capture the high sales attribute information :

install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'rmarkdown' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
install.packages("C50",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'C50' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
install.packages("tree",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'tree' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
install.packages("caret",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'caret' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
install.packages("gmodels",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'gmodels' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
install.packages("party",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'party' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2xR758\downloaded_packages
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(C50)
library(tree)
library(gmodels)

CompanyData <- read.csv(file.choose())
# Splitting data into training and testing.
# splitting the data based on Sales
hist(CompanyData$Sales)

High = ifelse(CompanyData$Sales<10, "No", "Yes")
CD = data.frame(CompanyData, High)
#CD <- CompanyData[,2:12]
# View(CD)

CD_train <- CD[1:200,]

# View(CD_train)
CD_test <- CD[201:400,]

# View(CD_test)

#Using Party Function 
op_tree = ctree(High ~ CompPrice + Income + Advertising + Population + Price + ShelveLoc
+ Age + Education + Urban + US, data = CD_train)
summary(op_tree)
##     Length      Class       Mode 
##          1 BinaryTree         S4
plot(op_tree)

# On looking into the Above tree, i see that if the Location of the Shelv is good,
# then there is a probability of 60% chance that the customer will buy.
# With ShelveLoc having a Bad or Medium and Price <= 87, the probability of High sales 
# could be 60%.
# If ShelveLoc is Bad or Medium, With Price >= 87 and Advertising less then <= 7 then there
# is a zero percent chance of high sales.
# If ShelveLoc is Bad or Medium, With Price >= 87 and Advertising less then > 7 then there
# is a 20 % percent chance of high sales.
pred_tree <- as.data.frame(predict(op_tree,newdata=CD_test))
pred_tree["final"] <- NULL
pred_test_df <- predict(op_tree,newdata=CD_test)


mean(pred_test_df==CD$High) # Accuracy = 68.75%
## [1] 0.6875
CrossTable(CD_test$High,pred_test_df)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  200 
## 
##  
##              | pred_test_df 
## CD_test$High |        No |       Yes | Row Total | 
## -------------|-----------|-----------|-----------|
##           No |       131 |        31 |       162 | 
##              |     2.468 |     5.899 |           | 
##              |     0.809 |     0.191 |     0.810 | 
##              |     0.929 |     0.525 |           | 
##              |     0.655 |     0.155 |           | 
## -------------|-----------|-----------|-----------|
##          Yes |        10 |        28 |        38 | 
##              |    10.523 |    25.148 |           | 
##              |     0.263 |     0.737 |     0.190 | 
##              |     0.071 |     0.475 |           | 
##              |     0.050 |     0.140 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       141 |        59 |       200 | 
##              |     0.705 |     0.295 |           | 
## -------------|-----------|-----------|-----------|
## 
## 
confusionMatrix(CD_test$High,pred_test_df)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  131  31
##        Yes  10  28
##                                           
##                Accuracy : 0.795           
##                  95% CI : (0.7323, 0.8487)
##     No Information Rate : 0.705           
##     P-Value [Acc > NIR] : 0.002590        
##                                           
##                   Kappa : 0.4503          
##  Mcnemar's Test P-Value : 0.001787        
##                                           
##             Sensitivity : 0.9291          
##             Specificity : 0.4746          
##          Pos Pred Value : 0.8086          
##          Neg Pred Value : 0.7368          
##              Prevalence : 0.7050          
##          Detection Rate : 0.6550          
##    Detection Prevalence : 0.8100          
##       Balanced Accuracy : 0.7018          
##                                           
##        'Positive' Class : No              
## 
##### Using tree function 
cd_tree_org <- tree(High~.-Sales,data=CD)
summary(cd_tree_org)
## 
## Classification tree:
## tree(formula = High ~ . - Sales, data = CD)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Advertising" "Age"         "CompPrice"  
## [6] "Population"  "Income"     
## Number of terminal nodes:  21 
## Residual mean deviance:  0.297 = 112.6 / 379 
## Misclassification error rate: 0.0725 = 29 / 400
plot(cd_tree_org)
text(cd_tree_org,pretty = 0)

# Using the training data

##### Using tree function 
cd_tree <- tree(High~.-Sales,data=CD_train)
summary(cd_tree)
## 
## Classification tree:
## tree(formula = High ~ . - Sales, data = CD_train)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Advertising" "Age"         "CompPrice"  
## [6] "Income"     
## Number of terminal nodes:  12 
## Residual mean deviance:  0.2927 = 55.02 / 188 
## Misclassification error rate: 0.08 = 16 / 200
plot(cd_tree)
text(cd_tree,pretty = 0)

### Evaluate the Model

# Predicting the test data using the model
pred_tree <- as.data.frame(predict(cd_tree,newdata=CD_test))
pred_tree["final"] <- NULL
pred_test_df <- predict(cd_tree,newdata=CD_test)


pred_tree$final <- colnames(pred_test_df)[apply(pred_test_df,1,which.max)]

pred_tree$final <- as.factor(pred_tree$final)
summary(pred_tree$final)
##  No Yes 
## 172  28
summary(CD_test$High)
##  No Yes 
## 162  38
mean(pred_tree$final==CD$High) # Accuracy = 77.25
## [1] 0.7725
CrossTable(CD_test$High,pred_tree$final)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  200 
## 
##  
##              | pred_tree$final 
## CD_test$High |        No |       Yes | Row Total | 
## -------------|-----------|-----------|-----------|
##           No |       153 |         9 |       162 | 
##              |     1.343 |     8.251 |           | 
##              |     0.944 |     0.056 |     0.810 | 
##              |     0.890 |     0.321 |           | 
##              |     0.765 |     0.045 |           | 
## -------------|-----------|-----------|-----------|
##          Yes |        19 |        19 |        38 | 
##              |     5.727 |    35.177 |           | 
##              |     0.500 |     0.500 |     0.190 | 
##              |     0.110 |     0.679 |           | 
##              |     0.095 |     0.095 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       172 |        28 |       200 | 
##              |     0.860 |     0.140 |           | 
## -------------|-----------|-----------|-----------|
## 
## 
confusionMatrix(CD_test$High,pred_tree$final)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  153   9
##        Yes  19  19
##                                           
##                Accuracy : 0.86            
##                  95% CI : (0.8041, 0.9049)
##     No Information Rate : 0.86            
##     P-Value [Acc > NIR] : 0.55018         
##                                           
##                   Kappa : 0.4942          
##  Mcnemar's Test P-Value : 0.08897         
##                                           
##             Sensitivity : 0.8895          
##             Specificity : 0.6786          
##          Pos Pred Value : 0.9444          
##          Neg Pred Value : 0.5000          
##              Prevalence : 0.8600          
##          Detection Rate : 0.7650          
##    Detection Prevalence : 0.8100          
##       Balanced Accuracy : 0.7841          
##                                           
##        'Positive' Class : No              
##