install.packages("rpart")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("rpart.plot")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(rpart)
library(rpart.plot)
library(readr)
df <- read_csv("500hits.csv")
## Rows: 465 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): PLAYER
## dbl (15): YRS, G, AB, R, H, 2B, 3B, HR, RBI, BB, SO, SB, CS, BA, HOF
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_model <- df[, !(names(df) %in% c("PLAYER"))]
df_model$HOF <- as.factor(df_model$HOF)
set.seed(123)
sample <- sample.int(n = nrow(df_model), size = floor(0.8 * nrow(df_model)), replace = FALSE)
train <- df_model[sample, ]
test <- df_model[-sample, ]
arbol_1 <- rpart(HOF ~ ., data = train, method = "class")
pred_default <- predict(arbol_1, test, type = "class")
acc_default <- mean(pred_default == test$HOF)
arbol_2 <- rpart(HOF ~ ., data = train, method = "class", 
                       control = rpart.control(maxdepth = 4), parms = list(split = "information"))
pred_limited <- predict(arbol_1, test, type = "class")
acc_limited <- mean(pred_limited == test$HOF)
arbol_3 <- rpart(HOF ~ ., data = train, method = "class",
                     control = rpart.control(maxdepth = 6, minbucket = 5),
                     parms = list(split = "information"))
pred_tuned <- predict(arbol_2, test, type = "class")
acc_tuned <- mean(pred_tuned == test$HOF)
print(paste("Precisión Árbol 1: ", round(acc_default, 3)))
## [1] "Precisión Árbol 1:  0.785"
print(paste("Precisión Árbol 2: ", round(acc_limited, 3)))
## [1] "Precisión Árbol 2:  0.785"
print(paste("Precisión Árbol 3: ", round(acc_tuned, 3)))
## [1] "Precisión Árbol 3:  0.785"
rpart.plot(arbol_3, main = "Un Mejor Árbol", type = 2, extra = 104)