Instalar paquetes y llamar librerías
# install.packages("rpart")
library(rpart)
# install.packages("rpart.plot")
library(rpart.plot)
Importar la base de datos
house <- read.csv("C:\\Users\\raulc\\OneDrive\\Escritorio\\Documentos\\HousePriceData.csv")
Entender la base de datos
summary(house)
## Observation Dist_Taxi Dist_Market Dist_Hospital
## Min. : 1.0 Min. : 146 Min. : 1666 Min. : 3227
## 1st Qu.:237.0 1st Qu.: 6477 1st Qu.: 9367 1st Qu.:11302
## Median :469.0 Median : 8228 Median :11149 Median :13189
## Mean :468.4 Mean : 8235 Mean :11022 Mean :13091
## 3rd Qu.:700.0 3rd Qu.: 9939 3rd Qu.:12675 3rd Qu.:14855
## Max. :932.0 Max. :20662 Max. :20945 Max. :23294
##
## Carpet Builtup Parking City_Category
## Min. : 775 Min. : 932 Length:905 Length:905
## 1st Qu.: 1317 1st Qu.: 1579 Class :character Class :character
## Median : 1478 Median : 1774 Mode :character Mode :character
## Mean : 1511 Mean : 1794
## 3rd Qu.: 1654 3rd Qu.: 1985
## Max. :24300 Max. :12730
## NA's :7
## Rainfall House_Price
## Min. :-110.0 Min. : 1492000
## 1st Qu.: 600.0 1st Qu.: 4623000
## Median : 780.0 Median : 5860000
## Mean : 786.9 Mean : 6083992
## 3rd Qu.: 970.0 3rd Qu.: 7200000
## Max. :1560.0 Max. :150000000
##
str(house)
## 'data.frame': 905 obs. of 10 variables:
## $ Observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Dist_Taxi : int 9796 8294 11001 8301 10510 6665 13153 5882 7495 8233 ...
## $ Dist_Market : int 5250 8186 14399 11188 12629 5142 11869 9948 11589 7067 ...
## $ Dist_Hospital: int 10703 12694 16991 12289 13921 9972 17811 13315 13370 11400 ...
## $ Carpet : int 1659 1461 1340 1451 1770 1442 1542 1261 1090 1030 ...
## $ Builtup : int 1961 1752 1609 1748 2111 1733 1858 1507 1321 1235 ...
## $ Parking : chr "Open" "Not Provided" "Not Provided" "Covered" ...
## $ City_Category: chr "CAT B" "CAT B" "CAT A" "CAT B" ...
## $ Rainfall : int 530 210 720 620 450 760 1030 1020 680 1130 ...
## $ House_Price : int 6649000 3982000 5401000 5373000 4662000 4526000 7224000 3772000 4631000 4415000 ...
head(house)
## Observation Dist_Taxi Dist_Market Dist_Hospital Carpet Builtup Parking
## 1 1 9796 5250 10703 1659 1961 Open
## 2 2 8294 8186 12694 1461 1752 Not Provided
## 3 3 11001 14399 16991 1340 1609 Not Provided
## 4 4 8301 11188 12289 1451 1748 Covered
## 5 5 10510 12629 13921 1770 2111 Not Provided
## 6 6 6665 5142 9972 1442 1733 Open
## City_Category Rainfall House_Price
## 1 CAT B 530 6649000
## 2 CAT B 210 3982000
## 3 CAT A 720 5401000
## 4 CAT B 620 5373000
## 5 CAT B 450 4662000
## 6 CAT B 760 4526000
Crear árbol de decisión
house <- house[-348, ]
house$Parking <- as.factor(house$Parking)
house$City_Category <- as.factor(house$City_Category)
str(house)
## 'data.frame': 904 obs. of 10 variables:
## $ Observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Dist_Taxi : int 9796 8294 11001 8301 10510 6665 13153 5882 7495 8233 ...
## $ Dist_Market : int 5250 8186 14399 11188 12629 5142 11869 9948 11589 7067 ...
## $ Dist_Hospital: int 10703 12694 16991 12289 13921 9972 17811 13315 13370 11400 ...
## $ Carpet : int 1659 1461 1340 1451 1770 1442 1542 1261 1090 1030 ...
## $ Builtup : int 1961 1752 1609 1748 2111 1733 1858 1507 1321 1235 ...
## $ Parking : Factor w/ 4 levels "Covered","No Parking",..: 4 3 3 1 3 4 2 4 3 4 ...
## $ City_Category: Factor w/ 3 levels "CAT A","CAT B",..: 2 2 1 2 2 2 1 3 2 3 ...
## $ Rainfall : int 530 210 720 620 450 760 1030 1020 680 1130 ...
## $ House_Price : int 6649000 3982000 5401000 5373000 4662000 4526000 7224000 3772000 4631000 4415000 ...
arbol_house <- rpart(House_Price~., data=house)
options(scipen = 999)
rpart.plot(arbol_house)

Conclusiones
LS0tDQp0aXRsZTogIkhvdXNlIFByaWNpbmciDQphdXRob3I6ICJSYXVsIENhbnR1IC0gQTAxMDg3NjgzIg0KZGF0ZTogIjIwMjUtMDgtMjAiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUNCiAgICB0aGVtZTogY29zbW8NCi0tLQ0KDQo8Y2VudGVyPg0KIVtdKGh0dHBzOi8vc3RhdGljLndpa2lhLm5vY29va2llLm5ldC9qdW1hbmppL2ltYWdlcy81LzVlL0p1bWFuamktZWFydGhxdWFrZS5qcGcvcmV2aXNpb24vbGF0ZXN0P2NiPTIwMTgwMjEyMjExMDU3KQ0KPC9jZW50ZXI+DQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBJbnN0YWxhciBwYXF1ZXRlcyB5IGxsYW1hciBsaWJyZXLDrWFzIDwvc3Bhbj4NCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0IikNCmxpYnJhcnkocnBhcnQpDQojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQ0KbGlicmFyeShycGFydC5wbG90KQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0KaG91c2UgPC0gcmVhZC5jc3YoIkM6XFxVc2Vyc1xccmF1bGNcXE9uZURyaXZlXFxFc2NyaXRvcmlvXFxEb2N1bWVudG9zXFxIb3VzZVByaWNlRGF0YS5jc3YiKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0Kc3VtbWFyeShob3VzZSkNCnN0cihob3VzZSkNCmhlYWQoaG91c2UpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZSI+IENyZWFyIMOhcmJvbCBkZSBkZWNpc2nDs24gPC9zcGFuPg0KYGBge3J9DQpob3VzZSA8LSBob3VzZVstMzQ4LCBdDQpob3VzZSRQYXJraW5nIDwtIGFzLmZhY3Rvcihob3VzZSRQYXJraW5nKQ0KaG91c2UkQ2l0eV9DYXRlZ29yeSA8LSBhcy5mYWN0b3IoaG91c2UkQ2l0eV9DYXRlZ29yeSkNCnN0cihob3VzZSkNCmFyYm9sX2hvdXNlIDwtIHJwYXJ0KEhvdXNlX1ByaWNlfi4sIGRhdGE9aG91c2UpDQpvcHRpb25zKHNjaXBlbiA9IDk5OSkNCnJwYXJ0LnBsb3QoYXJib2xfaG91c2UpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZSI+IENvbmNsdXNpb25lcyA8L3NwYW4+DQogIA0KDQoNCg==