CART

# Experimento simulado: Efecto de Riego (2 niveles), Fertilización (2 niveles) y Genotipo (3)
# Especie elegida: "Trigo" 


library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(car)        # LeveneTest

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.2.3

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(emmeans)    # medias marginales y comparaciones

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(rpart)      # CART

## Warning: package 'rpart' was built under R version 4.2.3

library(rpart.plot) # para graficar árboles

set.seed(186) 

# Factores:
# - Riego: "Presente", "Ausente"
# - Fertilizacion: "Si", "No"
# - Genotipo: "G1","G2","G3"
# Reps por combinación:
reps <- 5

niveles_riego <- c("Presente", "Ausente")
niveles_fert <- c("Si", "No")
genotipos <- c("G1","G2","G3")

design <- expand.grid(Riego = niveles_riego,
                      Fert = niveles_fert,
                      Genotipo = genotipos,
                      Rep = 1:reps,
                      stringsAsFactors = FALSE)

# Parámetros:
# línea base (intercepto)
mu0 <- 50

#efectos
ef_riego <- c(Presente = rnorm(1, 5, 2), Ausente = 0)
ef_fert <- c(Si = rnorm(1, 4, 2), No = 0)
ef_gen <- c(G1 = 0, G2 = rnorm(1, 2, 1), G3 = rnorm(1, 3, 1))
sd_error <- 4


# interacción inventada 
# por ejemplo: G3 responde especialmente al riego
int_rg <- matrix(0, nrow = length(genotipos), ncol = length(niveles_riego),
                 dimnames = list(genotipos, niveles_riego))
int_rg["G3", "Presente"] <- 3  # G3 gana extra con riego

# ruido residual
sd_error <- 4

# generar variable respuesta 'Rendimiento'
design <- design %>%
  mutate(
    # efecto sumando intercept + efectos principales + interacción según combinación
    mu = mu0 +
         ef_riego[Riego] +
         ef_fert[Fert] +
         ef_gen[Genotipo] +
         mapply(function(g,r) int_rg[g,r], Genotipo, Riego),
    Rendimiento = mu + rnorm(n = n(), mean = 0, sd = sd_error)
  )

# convertir a factores
design <- design %>%
  mutate(Riego = factor(Riego, levels = c("Ausente","Presente")),
         Fert = factor(Fert, levels = c("No","Si")),
         Genotipo = factor(Genotipo, levels = c("G1","G2","G3")))

# ver primeras filas
head(design)

##      Riego Fert Genotipo Rep       mu Rendimiento
## 1 Presente   Si       G1   1 56.00293    61.46037
## 2  Ausente   Si       G1   1 53.36504    50.63815
## 3 Presente   No       G1   1 52.63788    48.82017
## 4  Ausente   No       G1   1 50.00000    48.28607
## 5 Presente   Si       G2   1 58.95895    59.36874
## 6  Ausente   Si       G2   1 56.32107    58.16799

# Modelo factorial completo con interacciones
modelo <- aov(Rendimiento ~ Riego * Fert * Genotipo, data = design)
summary(modelo)

##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Riego                1  382.9   382.9  26.208 5.36e-06 ***
## Fert                 1  149.0   149.0  10.197  0.00248 ** 
## Genotipo             2  123.7    61.9   4.234  0.02025 *  
## Riego:Fert           1    0.8     0.8   0.053  0.81916    
## Riego:Genotipo       2   29.3    14.6   1.001  0.37496    
## Fert:Genotipo        2   22.4    11.2   0.766  0.47031    
## Riego:Fert:Genotipo  2   97.6    48.8   3.340  0.04386 *  
## Residuals           48  701.4    14.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Comprobaciones de supuestos
# 1) Normalidad de residuos (Shapiro-Wilk)
shapiro.test(residuals(modelo))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(modelo)
## W = 0.98526, p-value = 0.6837

# 2) Homogeneidad de varianzas (Levene)
leveneTest(Rendimiento ~ interaction(Riego, Fert, Genotipo), data = design)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group 11  0.4484 0.9251
##       48

# Medias marginales (emmeans) y comparaciones si quieres
emm <- emmeans(modelo, ~ Riego * Fert * Genotipo)
# medias por combinación
summary(emm)

##  Riego    Fert Genotipo emmean   SE df lower.CL upper.CL
##  Ausente  No   G1         50.6 1.71 48     47.2     54.1
##  Presente No   G1         54.0 1.71 48     50.6     57.5
##  Ausente  Si   G1         51.0 1.71 48     47.5     54.4
##  Presente Si   G1         60.1 1.71 48     56.7     63.6
##  Ausente  No   G2         50.1 1.71 48     46.7     53.5
##  Presente No   G2         59.1 1.71 48     55.7     62.5
##  Ausente  Si   G2         57.9 1.71 48     54.5     61.4
##  Presente Si   G2         60.5 1.71 48     57.1     63.9
##  Ausente  No   G3         55.2 1.71 48     51.8     58.6
##  Presente No   G3         57.3 1.71 48     53.8     60.7
##  Ausente  Si   G3         55.8 1.71 48     52.4     59.2
##  Presente Si   G3         59.9 1.71 48     56.5     63.4
## 
## Confidence level used: 0.95

# Comparaciones múltiples (Tukey) entre combinaciones
# (opcional, puede generar muchas comparaciones)
pairs(emm, adjust = "tukey")

##  contrast                        estimate   SE df t.ratio p.value
##  Ausente No G1 - Presente No G1    -3.427 2.42 48  -1.417  0.9542
##  Ausente No G1 - Ausente Si G1     -0.351 2.42 48  -0.145  1.0000
##  Ausente No G1 - Presente Si G1    -9.502 2.42 48  -3.930  0.0129
##  Ausente No G1 - Ausente No G2      0.511 2.42 48   0.211  1.0000
##  Ausente No G1 - Presente No G2    -8.482 2.42 48  -3.508  0.0412
##  Ausente No G1 - Ausente Si G2     -7.330 2.42 48  -3.032  0.1308
##  Ausente No G1 - Presente Si G2    -9.874 2.42 48  -4.084  0.0082
##  Ausente No G1 - Ausente No G3     -4.592 2.42 48  -1.899  0.7536
##  Ausente No G1 - Presente No G3    -6.650 2.42 48  -2.751  0.2339
##  Ausente No G1 - Ausente Si G3     -5.175 2.42 48  -2.141  0.5978
##  Ausente No G1 - Presente Si G3    -9.318 2.42 48  -3.854  0.0160
##  Presente No G1 - Ausente Si G1     3.076 2.42 48   1.272  0.9788
##  Presente No G1 - Presente Si G1   -6.076 2.42 48  -2.513  0.3568
##  Presente No G1 - Ausente No G2     3.937 2.42 48   1.629  0.8897
##  Presente No G1 - Presente No G2   -5.055 2.42 48  -2.091  0.6311
##  Presente No G1 - Ausente Si G2    -3.903 2.42 48  -1.615  0.8952
##  Presente No G1 - Presente Si G2   -6.448 2.42 48  -2.667  0.2735
##  Presente No G1 - Ausente No G3    -1.166 2.42 48  -0.482  1.0000
##  Presente No G1 - Presente No G3   -3.224 2.42 48  -1.333  0.9701
##  Presente No G1 - Ausente Si G3    -1.749 2.42 48  -0.723  0.9998
##  Presente No G1 - Presente Si G3   -5.891 2.42 48  -2.437  0.4026
##  Ausente Si G1 - Presente Si G1    -9.151 2.42 48  -3.785  0.0195
##  Ausente Si G1 - Ausente No G2      0.861 2.42 48   0.356  1.0000
##  Ausente Si G1 - Presente No G2    -8.131 2.42 48  -3.363  0.0598
##  Ausente Si G1 - Ausente Si G2     -6.979 2.42 48  -2.887  0.1784
##  Ausente Si G1 - Presente Si G2    -9.524 2.42 48  -3.939  0.0126
##  Ausente Si G1 - Ausente No G3     -4.241 2.42 48  -1.754  0.8335
##  Ausente Si G1 - Presente No G3    -6.300 2.42 48  -2.606  0.3051
##  Ausente Si G1 - Ausente Si G3     -4.824 2.42 48  -1.996  0.6939
##  Ausente Si G1 - Presente Si G3    -8.967 2.42 48  -3.709  0.0240
##  Presente Si G1 - Ausente No G2    10.013 2.42 48   4.142  0.0069
##  Presente Si G1 - Presente No G2    1.020 2.42 48   0.422  1.0000
##  Presente Si G1 - Ausente Si G2     2.172 2.42 48   0.899  0.9988
##  Presente Si G1 - Presente Si G2   -0.372 2.42 48  -0.154  1.0000
##  Presente Si G1 - Ausente No G3     4.910 2.42 48   2.031  0.6709
##  Presente Si G1 - Presente No G3    2.852 2.42 48   1.180  0.9882
##  Presente Si G1 - Ausente Si G3     4.327 2.42 48   1.790  0.8153
##  Presente Si G1 - Presente Si G3    0.184 2.42 48   0.076  1.0000
##  Ausente No G2 - Presente No G2    -8.993 2.42 48  -3.720  0.0234
##  Ausente No G2 - Ausente Si G2     -7.840 2.42 48  -3.243  0.0803
##  Ausente No G2 - Presente Si G2   -10.385 2.42 48  -4.296  0.0043
##  Ausente No G2 - Ausente No G3     -5.103 2.42 48  -2.111  0.6180
##  Ausente No G2 - Presente No G3    -7.161 2.42 48  -2.962  0.1523
##  Ausente No G2 - Ausente Si G3     -5.686 2.42 48  -2.352  0.4565
##  Ausente No G2 - Presente Si G3    -9.828 2.42 48  -4.065  0.0087
##  Presente No G2 - Ausente Si G2     1.152 2.42 48   0.477  1.0000
##  Presente No G2 - Presente Si G2   -1.392 2.42 48  -0.576  1.0000
##  Presente No G2 - Ausente No G3     3.890 2.42 48   1.609  0.8973
##  Presente No G2 - Presente No G3    1.832 2.42 48   0.758  0.9998
##  Presente No G2 - Ausente Si G3     3.307 2.42 48   1.368  0.9642
##  Presente No G2 - Presente Si G3   -0.836 2.42 48  -0.346  1.0000
##  Ausente Si G2 - Presente Si G2    -2.545 2.42 48  -1.053  0.9953
##  Ausente Si G2 - Ausente No G3      2.738 2.42 48   1.132  0.9915
##  Ausente Si G2 - Presente No G3     0.679 2.42 48   0.281  1.0000
##  Ausente Si G2 - Ausente Si G3      2.155 2.42 48   0.891  0.9989
##  Ausente Si G2 - Presente Si G3    -1.988 2.42 48  -0.822  0.9995
##  Presente Si G2 - Ausente No G3     5.282 2.42 48   2.185  0.5678
##  Presente Si G2 - Presente No G3    3.224 2.42 48   1.334  0.9701
##  Presente Si G2 - Ausente Si G3     4.699 2.42 48   1.944  0.7266
##  Presente Si G2 - Presente Si G3    0.557 2.42 48   0.230  1.0000
##  Ausente No G3 - Presente No G3    -2.058 2.42 48  -0.851  0.9993
##  Ausente No G3 - Ausente Si G3     -0.583 2.42 48  -0.241  1.0000
##  Ausente No G3 - Presente Si G3    -4.726 2.42 48  -1.955  0.7198
##  Presente No G3 - Ausente Si G3     1.475 2.42 48   0.610  1.0000
##  Presente No G3 - Presente Si G3   -2.667 2.42 48  -1.103  0.9931
##  Ausente Si G3 - Presente Si G3    -4.143 2.42 48  -1.714  0.8531
## 
## P value adjustment: tukey method for comparing a family of 12 estimates

# 4) Árbol CART (rpart) para predecir Rendimiento y encontrar reglas
# usar rpart con todos los predictores categóricos
tree_model <- rpart(Rendimiento ~ Riego + Fert + Genotipo,
                    data = design,
                    method = "anova",
                    control = rpart.control(cp = 0.001)) 

# resumen y gráfica
print(tree_model)

## n= 60 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 60 1507.0500 55.96721  
##    2) Riego=Ausente 30  741.2155 53.44087  
##      4) Genotipo=G1 10  159.3879 50.79342 *
##      5) Genotipo=G2,G3 20  476.6922 54.76460  
##       10) Fert=No 10  186.4535 52.65875 *
##       11) Fert=Si 10  201.5461 56.87046 *
##    3) Riego=Presente 30  382.8928 58.49354  
##      6) Fert=No 15  194.1797 56.80429 *
##      7) Fert=Si 15  103.1062 60.18279 *

rpart.plot(tree_model, roundint = FALSE, box.palette = "RdYlGn", main = "Árbol CART - Rendimiento")

# Predicciones en los datos
design$pred_tree <- predict(tree_model, newdata = design)

# 5) Identificar mejor y peor combinacion (por medias observadas y por predicción del árbol)

# Medias observadas por combinación
medias <- design %>%
  group_by(Riego, Fert, Genotipo) %>%
  summarise(
    n = n(),
    mean_Rend = mean(Rendimiento),
    sd_Rend = sd(Rendimiento),
    mean_pred_tree = mean(pred_tree)
  ) %>%
  arrange(desc(mean_Rend)) %>%
  ungroup()

## `summarise()` has grouped output by 'Riego', 'Fert'. You can override using the
## `.groups` argument.

medias

## # A tibble: 12 × 7
##    Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##    <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
##  1 Presente Si    G2           5      60.5    2.87           60.2
##  2 Presente Si    G1           5      60.1    3.52           60.2
##  3 Presente Si    G3           5      59.9    2.22           60.2
##  4 Presente No    G2           5      59.1    3.47           56.8
##  5 Ausente  Si    G2           5      57.9    5.51           56.9
##  6 Presente No    G3           5      57.3    2.33           56.8
##  7 Ausente  Si    G3           5      55.8    4.13           56.9
##  8 Ausente  No    G3           5      55.2    3.96           52.7
##  9 Presente No    G1           5      54.0    3.83           56.8
## 10 Ausente  Si    G1           5      51.0    3.22           50.8
## 11 Ausente  No    G1           5      50.6    5.42           50.8
## 12 Ausente  No    G2           5      50.1    3.83           52.7

# Mejor combinación (según media observada)
mejor_obs <- medias %>% slice(1)
peor_obs  <- medias %>% slice(n())

# Mejor/peor según predicción del árbol
mejor_tree <- medias %>% arrange(desc(mean_pred_tree)) %>% slice(1)
peor_tree  <- medias %>% arrange(mean_pred_tree) %>% slice(1)

# Mostrar resultados resumidos
list(
  mejor_observado = mejor_obs,
  peor_observado = peor_obs,
  mejor_seg_un_arbol = mejor_tree,
  peor_seg_un_arbol = peor_tree
)

## $mejor_observado
## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente Si    G2           5      60.5    2.87           60.2
## 
## $peor_observado
## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente No    G2           5      50.1    3.83           52.7
## 
## $mejor_seg_un_arbol
## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente Si    G2           5      60.5    2.87           60.2
## 
## $peor_seg_un_arbol
## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente Si    G1           5      51.0    3.22           50.8

# 6) Resumen interpretativo (impreso)

cat("\n--- Resumen rápido ---\n")

## 
## --- Resumen rápido ---

cat("Factor niveles:\n")

## Factor niveles:

cat(" Riego:", levels(design$Riego), "\n")

##  Riego: Ausente Presente

cat(" Fert:", levels(design$Fert), "\n")

##  Fert: No Si

cat(" Genotipos:", levels(design$Genotipo), "\n\n")

##  Genotipos: G1 G2 G3

cat("Mejor combinación (media observada):\n")

## Mejor combinación (media observada):

print(mejor_obs)

## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente Si    G2           5      60.5    2.87           60.2

cat("\nPeor combinación (media observada):\n")

## 
## Peor combinación (media observada):

print(peor_obs)

## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente No    G2           5      50.1    3.83           52.7

cat("\nMejor según árbol (predicción promedio):\n")

## 
## Mejor según árbol (predicción promedio):

print(mejor_tree)

## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente Si    G2           5      60.5    2.87           60.2

cat("\nPeor según árbol (predicción promedio):\n")

## 
## Peor según árbol (predicción promedio):

print(peor_tree)

## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente Si    G1           5      51.0    3.22           50.8

# Para representar de una forma mas organica los resultados tenemos la siguiente grafica 

# Crear etiqueta única por combinación
medias$Tratamiento <- with(medias,
                           paste(Riego, Fert, Genotipo, sep = "_"))

# Identificar mejor y peor
best  <- medias$Tratamiento[which.max(medias$mean_Rend)]
worst <- medias$Tratamiento[which.min(medias$mean_Rend)]

# Nueva columna para colorear
medias$Categoria <- ifelse(medias$Tratamiento == best, "Mejor",
                           ifelse(medias$Tratamento == worst, "Peor", "Intermedio"))

## Warning: Unknown or uninitialised column: `Tratamento`.

# Gráfico bonito
library(ggplot2)

ggplot(medias, aes(x = reorder(Tratamiento, mean_Rend),
                   y = mean_Rend,
                   fill = Categoria)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("Mejor" = "darkgreen",
                               "Peor" = "red",
                               "Intermedio" = "gray70")) +
  labs(title = "Rendimiento Promedio por Combinación de Tratamientos",
       x = "Tratamiento (Riego_Fert_Genotipo)",
       y = "Rendimiento promedio") +
  theme_minimal(base_size = 13)

#Ahora se replicara la misma simulación cambiando la semilla

set.seed(999) 

# Factores:
# - Riego: "Presente", "Ausente"
# - Fertilizacion: "Si", "No"
# - Genotipo: "G1","G2","G3"
# Reps por combinación:
reps <- 5

niveles_riego <- c("Presente", "Ausente")
niveles_fert <- c("Si", "No")
genotipos <- c("G1","G2","G3")

design <- expand.grid(Riego = niveles_riego,
                      Fert = niveles_fert,
                      Genotipo = genotipos,
                      Rep = 1:reps,
                      stringsAsFactors = FALSE)

# Parámetros:
# línea base (intercepto)
mu0 <- 50

ef_riego <- c(Presente = rnorm(1, 5, 2), Ausente = 0)
ef_fert <- c(Si = rnorm(1, 4, 2), No = 0)
ef_gen <- c(G1 = 0, G2 = rnorm(1, 2, 1), G3 = rnorm(1, 3, 1))
sd_error <- 6

# generar variable respuesta 'Rendimiento'
design <- design %>%
  mutate(
    # efecto sumando intercept + efectos principales + interacción según combinación
    mu = mu0 +
         ef_riego[Riego] +
         ef_fert[Fert] +
         ef_gen[Genotipo] +
         mapply(function(g,r) int_rg[g,r], Genotipo, Riego),
    Rendimiento = mu + rnorm(n = n(), mean = 0, sd = sd_error)
  )

# convertir a factores
design <- design %>%
  mutate(Riego = factor(Riego, levels = c("Ausente","Presente")),
         Fert = factor(Fert, levels = c("No","Si")),
         Genotipo = factor(Genotipo, levels = c("G1","G2","G3")))

# ver primeras filas
head(design)

##      Riego Fert Genotipo Rep       mu Rendimiento
## 1 Presente   Si       G1   1 55.81140    54.14756
## 2  Ausente   Si       G1   1 51.37488    47.97874
## 3 Presente   No       G1   1 54.43652    43.16457
## 4  Ausente   No       G1   1 50.00000    42.39925
## 5 Presente   Si       G2   1 58.60658    52.80009
## 6  Ausente   Si       G2   1 54.17006    47.44401

# Modelo factorial completo con interacciones
modelo <- aov(Rendimiento ~ Riego * Fert * Genotipo, data = design)
summary(modelo)

##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Riego                1  623.4   623.4  16.086 0.000211 ***
## Fert                 1  130.0   130.0   3.353 0.073275 .  
## Genotipo             2  354.4   177.2   4.572 0.015218 *  
## Riego:Fert           1  115.1   115.1   2.971 0.091206 .  
## Riego:Genotipo       2   27.5    13.7   0.355 0.703152    
## Fert:Genotipo        2    3.3     1.6   0.042 0.958699    
## Riego:Fert:Genotipo  2   31.2    15.6   0.403 0.670597    
## Residuals           48 1860.1    38.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Comprobaciones de supuestos
# 1) Normalidad de residuos (Shapiro-Wilk)
shapiro.test(residuals(modelo))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(modelo)
## W = 0.9914, p-value = 0.9494

# 2) Homogeneidad de varianzas (Levene)
leveneTest(Rendimiento ~ interaction(Riego, Fert, Genotipo), data = design)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group 11  0.7402  0.695
##       48

# Medias marginales (emmeans) y comparaciones si quieres
emm <- emmeans(modelo, ~ Riego * Fert * Genotipo)
# medias por combinación
summary(emm)

##  Riego    Fert Genotipo emmean   SE df lower.CL upper.CL
##  Ausente  No   G1         45.7 2.78 48     40.1     51.3
##  Presente No   G1         54.5 2.78 48     48.9     60.1
##  Ausente  Si   G1         51.1 2.78 48     45.5     56.7
##  Presente Si   G1         55.2 2.78 48     49.6     60.8
##  Ausente  No   G2         50.1 2.78 48     44.5     55.7
##  Presente No   G2         56.2 2.78 48     50.6     61.8
##  Ausente  Si   G2         53.7 2.78 48     48.1     59.3
##  Presente Si   G2         57.2 2.78 48     51.6     62.8
##  Ausente  No   G3         49.4 2.78 48     43.8     55.0
##  Presente No   G3         62.3 2.78 48     56.7     67.9
##  Ausente  Si   G3         57.6 2.78 48     52.0     63.2
##  Presente Si   G3         61.0 2.78 48     55.4     66.6
## 
## Confidence level used: 0.95

# Comparaciones múltiples (Tukey) entre combinaciones
# (opcional, puede generar muchas comparaciones)
pairs(emm, adjust = "tukey")

##  contrast                        estimate   SE df t.ratio p.value
##  Ausente No G1 - Presente No G1    -8.787 3.94 48  -2.232  0.5361
##  Ausente No G1 - Ausente Si G1     -5.397 3.94 48  -1.371  0.9637
##  Ausente No G1 - Presente Si G1    -9.489 3.94 48  -2.410  0.4193
##  Ausente No G1 - Ausente No G2     -4.393 3.94 48  -1.116  0.9924
##  Ausente No G1 - Presente No G2   -10.438 3.94 48  -2.651  0.2815
##  Ausente No G1 - Ausente Si G2     -7.972 3.94 48  -2.025  0.6749
##  Ausente No G1 - Presente Si G2   -11.511 3.94 48  -2.924  0.1652
##  Ausente No G1 - Ausente No G3     -3.723 3.94 48  -0.946  0.9982
##  Ausente No G1 - Presente No G3   -16.542 3.94 48  -4.202  0.0058
##  Ausente No G1 - Ausente Si G3    -11.889 3.94 48  -3.020  0.1343
##  Ausente No G1 - Presente Si G3   -15.286 3.94 48  -3.883  0.0148
##  Presente No G1 - Ausente Si G1     3.390 3.94 48   0.861  0.9992
##  Presente No G1 - Presente Si G1   -0.702 3.94 48  -0.178  1.0000
##  Presente No G1 - Ausente No G2     4.393 3.94 48   1.116  0.9924
##  Presente No G1 - Presente No G2   -1.652 3.94 48  -0.419  1.0000
##  Presente No G1 - Ausente Si G2     0.814 3.94 48   0.207  1.0000
##  Presente No G1 - Presente Si G2   -2.724 3.94 48  -0.692  0.9999
##  Presente No G1 - Ausente No G3     5.064 3.94 48   1.286  0.9771
##  Presente No G1 - Presente No G3   -7.755 3.94 48  -1.970  0.7103
##  Presente No G1 - Ausente Si G3    -3.102 3.94 48  -0.788  0.9997
##  Presente No G1 - Presente Si G3   -6.500 3.94 48  -1.651  0.8807
##  Ausente Si G1 - Presente Si G1    -4.092 3.94 48  -1.039  0.9958
##  Ausente Si G1 - Ausente No G2      1.003 3.94 48   0.255  1.0000
##  Ausente Si G1 - Presente No G2    -5.042 3.94 48  -1.281  0.9778
##  Ausente Si G1 - Ausente Si G2     -2.576 3.94 48  -0.654  0.9999
##  Ausente Si G1 - Presente Si G2    -6.114 3.94 48  -1.553  0.9171
##  Ausente Si G1 - Ausente No G3      1.673 3.94 48   0.425  1.0000
##  Ausente Si G1 - Presente No G3   -11.146 3.94 48  -2.831  0.1999
##  Ausente Si G1 - Ausente Si G3     -6.492 3.94 48  -1.649  0.8815
##  Ausente Si G1 - Presente Si G3    -9.890 3.94 48  -2.512  0.3575
##  Presente Si G1 - Ausente No G2     5.096 3.94 48   1.294  0.9760
##  Presente Si G1 - Presente No G2   -0.950 3.94 48  -0.241  1.0000
##  Presente Si G1 - Ausente Si G2     1.516 3.94 48   0.385  1.0000
##  Presente Si G1 - Presente Si G2   -2.022 3.94 48  -0.514  1.0000
##  Presente Si G1 - Ausente No G3     5.766 3.94 48   1.464  0.9430
##  Presente Si G1 - Presente No G3   -7.053 3.94 48  -1.791  0.8144
##  Presente Si G1 - Ausente Si G3    -2.400 3.94 48  -0.610  1.0000
##  Presente Si G1 - Presente Si G3   -5.798 3.94 48  -1.473  0.9409
##  Ausente No G2 - Presente No G2    -6.045 3.94 48  -1.535  0.9228
##  Ausente No G2 - Ausente Si G2     -3.579 3.94 48  -0.909  0.9987
##  Ausente No G2 - Presente Si G2    -7.118 3.94 48  -1.808  0.8057
##  Ausente No G2 - Ausente No G3      0.670 3.94 48   0.170  1.0000
##  Ausente No G2 - Presente No G3   -12.149 3.94 48  -3.086  0.1159
##  Ausente No G2 - Ausente Si G3     -7.496 3.94 48  -1.904  0.7510
##  Ausente No G2 - Presente Si G3   -10.893 3.94 48  -2.767  0.2268
##  Presente No G2 - Ausente Si G2     2.466 3.94 48   0.626  1.0000
##  Presente No G2 - Presente Si G2   -1.073 3.94 48  -0.272  1.0000
##  Presente No G2 - Ausente No G3     6.715 3.94 48   1.706  0.8568
##  Presente No G2 - Presente No G3   -6.104 3.94 48  -1.550  0.9180
##  Presente No G2 - Ausente Si G3    -1.451 3.94 48  -0.368  1.0000
##  Presente No G2 - Presente Si G3   -4.848 3.94 48  -1.231  0.9835
##  Ausente Si G2 - Presente Si G2    -3.538 3.94 48  -0.899  0.9988
##  Ausente Si G2 - Ausente No G3      4.249 3.94 48   1.079  0.9943
##  Ausente Si G2 - Presente No G3    -8.570 3.94 48  -2.177  0.5734
##  Ausente Si G2 - Ausente Si G3     -3.917 3.94 48  -0.995  0.9971
##  Ausente Si G2 - Presente Si G3    -7.314 3.94 48  -1.858  0.7780
##  Presente Si G2 - Ausente No G3     7.788 3.94 48   1.978  0.7051
##  Presente Si G2 - Presente No G3   -5.031 3.94 48  -1.278  0.9781
##  Presente Si G2 - Ausente Si G3    -0.378 3.94 48  -0.096  1.0000
##  Presente Si G2 - Presente Si G3   -3.775 3.94 48  -0.959  0.9979
##  Ausente No G3 - Presente No G3   -12.819 3.94 48  -3.256  0.0778
##  Ausente No G3 - Ausente Si G3     -8.166 3.94 48  -2.074  0.6425
##  Ausente No G3 - Presente Si G3   -11.563 3.94 48  -2.937  0.1606
##  Presente No G3 - Ausente Si G3     4.653 3.94 48   1.182  0.9880
##  Presente No G3 - Presente Si G3    1.256 3.94 48   0.319  1.0000
##  Ausente Si G3 - Presente Si G3    -3.397 3.94 48  -0.863  0.9992
## 
## P value adjustment: tukey method for comparing a family of 12 estimates

# 4) Árbol CART (rpart) para predecir Rendimiento y encontrar reglas
# usar rpart con todos los predictores categóricos
tree_model <- rpart(Rendimiento ~ Riego + Fert + Genotipo,
                    data = design,
                    method = "anova",
                    control = rpart.control(cp = 0.001)) 

# resumen y gráfica
print(tree_model)

## n= 60 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 60 3144.8870 54.50307  
##    2) Riego=Ausente 30 1091.0030 51.27983  
##      4) Fert=No 15  528.8248 48.42293 *
##      5) Fert=Si 15  317.3209 54.13674 *
##    3) Riego=Presente 30 1430.5300 57.72630  
##      6) Genotipo=G1,G2 20  964.1839 55.77363  
##       12) Genotipo=G1 10  627.3168 54.85524 *
##       13) Genotipo=G2 10  319.9984 56.69202 *
##      7) Genotipo=G3 10  237.5697 61.63165 *

rpart.plot(tree_model, roundint = FALSE, box.palette = "RdYlGn", main = "Árbol CART - Rendimiento")

# Predicciones en los datos
design$pred_tree <- predict(tree_model, newdata = design)

# 5) Identificar mejor y peor combinacion (por medias observadas y por predicción del árbol)

# Medias observadas por combinación
medias <- design %>%
  group_by(Riego, Fert, Genotipo) %>%
  summarise(
    n = n(),
    mean_Rend = mean(Rendimiento),
    sd_Rend = sd(Rendimiento),
    mean_pred_tree = mean(pred_tree)
  ) %>%
  arrange(desc(mean_Rend)) %>%
  ungroup()

## `summarise()` has grouped output by 'Riego', 'Fert'. You can override using the
## `.groups` argument.

medias

## # A tibble: 12 × 7
##    Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##    <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
##  1 Presente No    G3           5      62.3    5.35           61.6
##  2 Presente Si    G3           5      61.0    5.46           61.6
##  3 Ausente  Si    G3           5      57.6    2.78           54.1
##  4 Presente Si    G2           5      57.2    4.86           56.7
##  5 Presente No    G2           5      56.2    7.46           56.7
##  6 Presente Si    G1           5      55.2    7.18           54.9
##  7 Presente No    G1           5      54.5   10.2            54.9
##  8 Ausente  Si    G2           5      53.7    6.38           54.1
##  9 Ausente  Si    G1           5      51.1    2.06           54.1
## 10 Ausente  No    G2           5      50.1    5.92           48.4
## 11 Ausente  No    G3           5      49.4    6.15           48.4
## 12 Ausente  No    G1           5      45.7    6.73           48.4

# Mejor combinación (según media observada)
mejor_obs <- medias %>% slice(1)
peor_obs  <- medias %>% slice(n())

# Mejor/peor según predicción del árbol
mejor_tree <- medias %>% arrange(desc(mean_pred_tree)) %>% slice(1)
peor_tree  <- medias %>% arrange(mean_pred_tree) %>% slice(1)

# Mostrar resultados resumidos
list(
  mejor_observado = mejor_obs,
  peor_observado = peor_obs,
  mejor_seg_un_arbol = mejor_tree,
  peor_seg_un_arbol = peor_tree
)

## $mejor_observado
## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente No    G3           5      62.3    5.35           61.6
## 
## $peor_observado
## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente No    G1           5      45.7    6.73           48.4
## 
## $mejor_seg_un_arbol
## # A tibble: 1 × 7
##   Riego    Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>    <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Presente No    G3           5      62.3    5.35           61.6
## 
## $peor_seg_un_arbol
## # A tibble: 1 × 7
##   Riego   Fert  Genotipo     n mean_Rend sd_Rend mean_pred_tree
##   <fct>   <fct> <fct>    <int>     <dbl>   <dbl>          <dbl>
## 1 Ausente No    G2           5      50.1    5.92           48.4

# Crear etiqueta única por combinación
medias$Tratamiento <- with(medias,
                           paste(Riego, Fert, Genotipo, sep = "_"))

# Identificar mejor y peor
best  <- medias$Tratamiento[which.max(medias$mean_Rend)]
worst <- medias$Tratamiento[which.min(medias$mean_Rend)]

# Nueva columna para colorear
medias$Categoria <- ifelse(medias$Tratamiento == best, "Mejor",
                           ifelse(medias$Tratamento == worst, "Peor", "Intermedio"))

## Warning: Unknown or uninitialised column: `Tratamento`.

# Gráfico bonito
library(ggplot2)

ggplot(medias, aes(x = reorder(Tratamiento, mean_Rend),
                   y = mean_Rend,
                   fill = Categoria)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("Mejor" = "darkgreen",
                               "Peor" = "red",
                               "Intermedio" = "gray70")) +
  labs(title = "Rendimiento Promedio por Combinación de Tratamientos",
       x = "Tratamiento (Riego_Fert_Genotipo)",
       y = "Rendimiento promedio") +
  theme_minimal(base_size = 13)

Al generar diferentes semillas en la simulacion observamos que el arbol CART mantiene casi las mismas combinaciones como optimas o malas, esto puede indicar diferencias entre los tratamientos producen efectos fuertes y estbales que superan la variabilidad aleatoria incluida en la simulacion, se podria tambien modificar la estructura del modelo ya que la semilla solo modifica los valores simulados mas no esta estructura

CART

María Sánchez

2025-11-14