library(tree)
## Warning: package 'tree' was built under R version 4.3.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data<-read.csv('C:/Users/rusoc/OneDrive/Escritorio/TEC/Mineria de datos/Wine.csv')
train=sample(seq(length(data$Customer_Segment)),length(data$Customer_Segment)*0.7,replace=FALSE)
unique(data$Customer_Segment)
## [1] 1 2 3
data.tree = tree(data$Customer_Segment~.,data,subset=train)
summary(data.tree)
##
## Regression tree:
## tree(formula = data$Customer_Segment ~ ., data = data, subset = train)
## Number of terminal nodes: 8
## Residual mean deviance: 0.1618 = 18.77 / 116
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.50000 -0.03333 -0.02632 0.00000 0.00000 1.40000
plot(data.tree);text(data.tree,pretty=0)

data.tree
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 72.6000 1.944
## 2) Malic_Acid < 2.275 78 26.6800 1.603
## 4) Alcohol < 13.155 43 4.9770 1.977
## 8) Alcohol < 13.04 38 0.9737 2.026 *
## 9) Alcohol > 13.04 5 3.2000 1.600 *
## 5) Alcohol > 13.155 35 8.2860 1.143
## 10) Color_Intensity < 7.65 30 0.9667 1.033 *
## 11) Color_Intensity > 7.65 5 4.8000 1.800 *
## 3) Malic_Acid > 2.275 46 21.4800 2.522
## 6) Color_Intensity < 5.25 23 10.9600 2.043
## 12) Alcohol < 13.2 17 4.4710 2.176
## 24) Color_Intensity < 3.825 11 0.0000 2.000 *
## 25) Color_Intensity > 3.825 6 3.5000 2.500 *
## 13) Alcohol > 13.2 6 5.3330 1.667 *
## 7) Color_Intensity > 5.25 23 0.0000 3.000 *
test <- setdiff(seq(length(data$Customer_Segment)), train)
tree.pred <- predict(data.tree, data[test, ])
summary(tree.pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.033 1.033 2.026 1.922 2.026 3.000