Cargar base de datos

Imputacion de datos faltantes (Los que no fueron medidos del primer par de foliolos)

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
prediccion <- filter(datos, is.na(A_1F) )
prediccion
## # A tibble: 62 × 15
##    EST       DDE     M FEC                    PL    NH    AF Mean   Min   Max  
##    <chr>   <dbl> <dbl> <dttm>              <dbl> <dbl> <dbl> <chr>  <chr> <chr>
##  1 Vanessa    22     1 2023-10-25 00:00:00     4    29 13.9  84827  16    215  
##  2 Vanessa    22     1 2023-10-25 00:00:00     4    32 13.9  84827  16    215  
##  3 Vanessa    22     1 2023-10-25 00:00:00     4    40 20.3  82035  14    171  
##  4 Vanessa    22     1 2023-10-25 00:00:00     5     3 15.4  109384 15    216  
##  5 Vanessa    22     1 2023-10-25 00:00:00     5    17 10.6  97743  11    213  
##  6 Vanessa    22     1 2023-10-25 00:00:00     7     1  7.32 84448  7     169  
##  7 Vanessa    22     1 2023-10-25 00:00:00     7     2  8.97 90203  11    185  
##  8 Vanessa    22     1 2023-10-25 00:00:00     7     4  7.13 98006  16    180  
##  9 Vanessa    22     1 2023-10-25 00:00:00     7     5  7.13 98006  16    180  
## 10 Vanessa    22     1 2023-10-25 00:00:00     7     7 15.6  93622  16    179  
## # ℹ 52 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>
entrenamiento <- filter(datos, !is.na(A_1F) )
entrenamiento
## # A tibble: 3,574 × 15
##    EST       DDE     M FEC                    PL    NH    AF Mean    Min   Max  
##    <chr>   <dbl> <dbl> <dttm>              <dbl> <dbl> <dbl> <chr>   <chr> <chr>
##  1 Vanessa    22     1 2023-10-25 00:00:00     1     1  12.7 64,767… 9.00  129.…
##  2 Vanessa    22     1 2023-10-25 00:00:00     1     2  88.4 80853   2     194  
##  3 Vanessa    22     1 2023-10-25 00:00:00     1     3  45.7 76323   2     162  
##  4 Vanessa    22     1 2023-10-25 00:00:00     1     4  74.3 90186   2     194  
##  5 Vanessa    22     1 2023-10-25 00:00:00     1     5  79.0 93819   2     191  
##  6 Vanessa    22     1 2023-10-25 00:00:00     1     6  94.9 67920   1     166  
##  7 Vanessa    22     1 2023-10-25 00:00:00     1     7  21.2 81335   2     178  
##  8 Vanessa    22     1 2023-10-25 00:00:00     1     8  44.0 83142   3     185  
##  9 Vanessa    22     1 2023-10-25 00:00:00     1     9  67.6 92057   9     204  
## 10 Vanessa    22     1 2023-10-25 00:00:00     1    10  68.4 91681   9     204  
## # ℹ 3,564 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>
#
modelo1 <- lm(A_1F~AF, entrenamiento)
summary(modelo1)
## 
## Call:
## lm(formula = A_1F ~ AF, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3889 -0.8112  0.1957  1.0289  6.9064 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.2460617  0.0447367   94.91   <2e-16 ***
## AF          0.0667492  0.0006609  100.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.634 on 3572 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7406 
## F-statistic: 1.02e+04 on 1 and 3572 DF,  p-value: < 2.2e-16
#
modelo2 <- lm(A_1F~L_H, entrenamiento)
summary(modelo2)
## 
## Call:
## lm(formula = A_1F ~ L_H, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2168 -0.8850  0.2261  1.0899  6.1185 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.803081   0.080214   10.01   <2e-16 ***
## L_H         0.526351   0.005612   93.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.724 on 3572 degrees of freedom
## Multiple R-squared:  0.7112, Adjusted R-squared:  0.7111 
## F-statistic:  8796 on 1 and 3572 DF,  p-value: < 2.2e-16
#
modelo3 <- lm(A_1F~L_FT, entrenamiento)
summary(modelo3)
## 
## Call:
## lm(formula = A_1F ~ L_FT, data = entrenamiento)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4612  -1.0227   0.1418   1.2171   8.7119 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.37097    0.14890  -15.92   <2e-16 ***
## L_FT         1.52970    0.02173   70.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.076 on 3572 degrees of freedom
## Multiple R-squared:  0.5812, Adjusted R-squared:  0.5811 
## F-statistic:  4957 on 1 and 3572 DF,  p-value: < 2.2e-16
#
modelo4 <- lm(A_1F~A_FT, entrenamiento)
summary(modelo4)
## 
## Call:
## lm(formula = A_1F ~ A_FT, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.133  -1.167   0.314   1.446   9.194 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.81705    0.14687  -5.563 2.85e-08 ***
## A_FT         1.96906    0.03236  60.858  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.247 on 3572 degrees of freedom
## Multiple R-squared:  0.509,  Adjusted R-squared:  0.5089 
## F-statistic:  3704 on 1 and 3572 DF,  p-value: < 2.2e-16
#
modelo5 <- lm(A_1F~ AF + L_H + A_FT, entrenamiento)
summary(modelo5)
## 
## Call:
## lm(formula = A_1F ~ AF + L_H + A_FT, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1736 -0.7769  0.2309  1.0072  6.3493 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.61767    0.14259  18.358  < 2e-16 ***
## AF           0.04417    0.00186  23.747  < 2e-16 ***
## L_H          0.16837    0.01605  10.492  < 2e-16 ***
## A_FT         0.13507    0.03967   3.405 0.000669 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.596 on 3570 degrees of freedom
## Multiple R-squared:  0.7525, Adjusted R-squared:  0.7523 
## F-statistic:  3619 on 3 and 3570 DF,  p-value: < 2.2e-16
#
modelo6 <- lm(A_1F~ AF * L_H , entrenamiento)
summary(modelo6)
## 
## Call:
## lm(formula = A_1F ~ AF * L_H, data = entrenamiento)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0456  -0.6500   0.1877   0.8093   7.0864 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.5342141  0.1049271  14.622  < 2e-16 ***
## AF           0.1601660  0.0035473  45.152  < 2e-16 ***
## L_H          0.0879052  0.0130471   6.738 1.87e-11 ***
## AF:L_H      -0.0037939  0.0001044 -36.357  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.366 on 3570 degrees of freedom
## Multiple R-squared:  0.8188, Adjusted R-squared:  0.8187 
## F-statistic:  5378 on 3 and 3570 DF,  p-value: < 2.2e-16
#
plot(modelo6)

#
prediccion$A_1F <- round( predict(modelo6, newdata = prediccion), digits = 3)
str(prediccion)
## tibble [62 × 15] (S3: tbl_df/tbl/data.frame)
##  $ EST : chr [1:62] "Vanessa" "Vanessa" "Vanessa" "Vanessa" ...
##  $ DDE : num [1:62] 22 22 22 22 22 22 22 22 22 22 ...
##  $ M   : num [1:62] 1 1 1 1 1 1 1 1 1 1 ...
##  $ FEC : POSIXct[1:62], format: "2023-10-25" "2023-10-25" ...
##  $ PL  : num [1:62] 4 4 4 5 5 7 7 7 7 7 ...
##  $ NH  : num [1:62] 29 32 40 3 17 1 2 4 5 7 ...
##  $ AF  : num [1:62] 13.9 13.9 20.3 15.4 10.6 ...
##  $ Mean: chr [1:62] "84827" "84827" "82035" "109384" ...
##  $ Min : chr [1:62] "16" "16" "14" "15" ...
##  $ Max : chr [1:62] "215" "215" "171" "216" ...
##  $ L_H : num [1:62] 9.34 9.34 9.66 7.71 6.15 ...
##  $ A_1F: Named num [1:62] 4.08 4.08 4.89 4.22 3.52 ...
##   ..- attr(*, "names")= chr [1:62] "1" "2" "3" "4" ...
##  $ A_2F: num [1:62] NA NA NA NA NA NA NA NA NA NA ...
##  $ L_FT: num [1:62] 3.94 3.94 5.32 6.22 3.54 ...
##  $ A_FT: num [1:62] 4.08 4.08 4.6 3.12 3.42 ...
#
BD <- rbind(entrenamiento, prediccion)

Base de datos

library(dplyr)
BD1 <- BD %>%
  mutate(., NH=as.numeric(NH), AF=as.numeric(AF), L_H=as.numeric(L_H),
         A_1F=as.numeric(A_1F), A_2F=as.numeric(A_2F),
         L_FT=as.numeric(L_FT), A_FT=as.numeric(A_FT),) %>% 
  mutate(TIPO = case_when(
    is.na(A_2F) ~ "Joven",
    !is.na(L_H) ~ "Madura"
  )) %>%
  mutate(
    EST = as.factor(EST),
    M = as.factor(M),
    PL = as.factor(PL),
    TIPO = factor(TIPO, levels = c("Madura","Joven") )
  ) %>% 
  filter(., L_H>6 | L_H<34  ) %>% 
  select(., -FEC, -Mean, -Min, -Max)
table(BD1$TIPO)
## 
## Madura  Joven 
##   3150    486
str(BD1)
## tibble [3,636 × 12] (S3: tbl_df/tbl/data.frame)
##  $ EST : Factor w/ 2 levels "Karoll","Vanessa": 2 2 2 2 2 2 2 2 2 2 ...
##  $ DDE : num [1:3636] 22 22 22 22 22 22 22 22 22 22 ...
##  $ M   : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PL  : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ NH  : num [1:3636] 1 2 3 4 5 6 7 8 9 10 ...
##  $ AF  : num [1:3636] 12.7 88.4 45.7 74.3 79 ...
##  $ L_H : num [1:3636] 6.96 21.68 12.57 22.09 3.04 ...
##  $ A_1F: num [1:3636] 1.63 10.16 7 9.69 1.49 ...
##  $ A_2F: num [1:3636] NA 7.62 6.63 7.42 NA ...
##  $ L_FT: num [1:3636] 4.22 7.27 6.93 6.42 2.32 ...
##  $ A_FT: num [1:3636] 3.4 4.98 3.8 4.93 2.17 ...
##  $ TIPO: Factor w/ 2 levels "Madura","Joven": 2 1 1 1 2 1 1 1 1 1 ...

Analisis exploratorio

Longitud total (cm)

library(ggplot2)
# Longitud total
ggplot(BD1, aes(x=L_H, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Longitud (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_H, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Longitud (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(L_H),
            mediana=median(L_H),
            desv=sd(L_H),
            max=max(L_H),
            min=min(L_H)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  13.3    12.0  5.14  33.9  3.04
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(L_H),
            mediana=median(L_H),
            desv=sd(L_H),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura 13.8    12.6   5.20
## 2 Joven   9.71    9.07  2.80

Ancho total 1F (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_1F, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_1F, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Ancho (cm)" )+
  xlim(c("Madura", "Joven"))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(A_1F),
            mediana=median(A_1F),
            desv=sd(A_1F),
            max=max(A_1F),
            min=min(A_1F)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  7.77    7.33  3.21  21.3  1.06
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_1F),
            mediana=median(A_1F),
            desv=sd(A_1F),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  8.27    7.79  3.05
## 2 Joven   4.50    4.22  2.11

Ancho total 2F (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_2F, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_2F, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
    labs(x="Tipo de hoja",
       y="Ancho (cm)" )+
  xlim(c("Madura"))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).

#
BD1 %>% 
  summarise(.,
            media=mean(A_2F),
            mediana=median(A_2F),
            desv=sd(A_2F),
            max=max(A_2F),
            min=min(A_2F)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1    NA      NA    NA    NA    NA
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_2F),
            mediana=median(A_2F),
            desv=sd(A_2F),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  5.67    4.84  3.57
## 2 Joven  NA      NA    NA

Largo del foliolo terminal

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=L_FT, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  theme_bw()+
  labs(y="Conteo",
       x="Largo del foliolo terminal (cm)" )+
  theme(legend.position="top")

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_FT, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  theme_bw()+
  labs(x="Tipo de hoja",
       y="Largo del foliolo terminal (cm)" )+
  theme(legend.position="top")

#
BD1 %>% 
  summarise(.,
            media=mean(L_FT),
            mediana=median(L_FT),
            desv=sd(L_FT),
            max=max(L_FT),
            min=min(L_FT)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  6.65    6.45  1.60  13.8  1.96
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(L_FT),
            mediana=median(L_FT),
            desv=sd(L_FT),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  6.77    6.59  1.61
## 2 Joven   5.85    5.64  1.33

Ancho del foliolo terminal (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_FT, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho del foliolo terminal (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_FT, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Ancho del foliolo terminal (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(A_FT),
            mediana=median(A_FT),
            desv=sd(A_FT),
            max=max(A_FT),
            min=min(A_FT)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  4.38    4.25  1.16  10.0  1.76
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_FT),
            mediana=median(A_FT),
            desv=sd(A_FT),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  4.43    4.31 1.18 
## 2 Joven   4.08    3.98 0.975

Area foliar (mm2)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=AF  , fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  theme_bw()+
  labs(y="Conteo",
       x="Promedio de area foliar por hoja ("~"cm"^2~")" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=AF  , fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  theme_bw()+
  labs(x="Tipo de hoja",
       y="Promedio de area foliar por hoja ("~"cm"^2~")" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

BD1 %>% 
  summarise(.,
            media=mean(AF),
            mediana=median(AF),
            desv=sd(AF),
            max=max(AF),
            min=min(AF)
            )
## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  53.0    39.5  41.3  357.  7.13
#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(AF),
            mediana=median(AF),
            desv=sd(AF),
            )
## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  57.3    44.3  42.4
## 2 Joven   25.2    21.8  13.0

Como cambia el AF y el NH con el tiempo

library(dplyr)
###
AF_T <- BD1 %>% 
  group_by(DDE, PL) %>% 
  summarise(S_AF=sum(AF)) %>% 
  ungroup()
## `summarise()` has grouped output by 'DDE'. You can override using the `.groups`
## argument.
ggplot(AF_T, aes(x=factor(DDE), y=S_AF))+
  geom_boxplot()+
  labs(x="Dias despues de emergencia",
       y="Area foliar por planta ("~"cm"^2~")" )+
  ylim(0, 12000)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

Como cambia el NH con el tiempo

#
NH_T <- BD1 %>% 
  group_by(TIPO, DDE) %>% 
  summarise(S_NH= ( n()/7) ) %>% 
  ungroup()
## `summarise()` has grouped output by 'TIPO'. You can override using the
## `.groups` argument.
ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
  geom_bar(position="dodge",colour="black", stat = "identity")+
  theme_bw()+
  labs(x="Días después de emergencia",
       y="Promedio del número de hojas por planta" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
  geom_bar(position="identity",colour="black", stat = "identity")+
  theme_bw()+
  labs(x="Días después de emergencia",
       y="Promedio del número de hojas por planta" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork) 
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.4.3
library(readxl)
#
DolarColor <- "#69b3a2" #Color para dolar
EurColor <- "#CD6155" # Color para Euro
#
NH_T2 <- NH_T %>%  pivot_wider(., names_from = TIPO, values_from = S_NH)
#
library(dplyr)
#
AF_NH <- AF_T %>%
  group_by(., DDE) %>%
  summarise(., AF=mean(S_AF)) %>%
  inner_join(NH_T, by="DDE")
AF_NH
## # A tibble: 14 × 4
##      DDE    AF TIPO     S_NH
##    <dbl> <dbl> <fct>   <dbl>
##  1    22  866. Madura  17.4 
##  2    22  866. Joven    5   
##  3    31 1708. Madura  26.1 
##  4    31 1708. Joven    3.14
##  5    43 3410. Madura  46.6 
##  6    43 3410. Joven   21.1 
##  7    52 3099. Madura  46.3 
##  8    52 3099. Joven   12.6 
##  9    62 5105. Madura  82.4 
## 10    62 5105. Joven    8.43
## 11    73 7774. Madura 127.  
## 12    73 7774. Joven    9.29
## 13    82 5568. Madura 104.  
## 14    82 5568. Joven    9.86
#
coeff <- 0.02 # Coeficiente graduado para que las unidades de ambos ejes coincidan
# Por ello se utilizo para ajustar las unidades de la variable y secundaria como su eje
ggplot(AF_NH, aes(x=DDE, fill=TIPO))+
  geom_bar(aes(y=S_NH/coeff, fill=TIPO), stat = "identity", color="black")+
  geom_line(aes(y=AF), size=0.8, color="black")+
  scale_y_continuous(name="Área foliar promedio por planta ("~"cm"^2~")",
                     limits = c(0, 8000),
                     sec.axis = sec_axis(~.*coeff, name = "Número de hojas"))+
  scale_x_continuous(name="Días despues de emergencia",
                     breaks=seq(0, 90, 10))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Análisis de correlaciones

library(corrplot)
## corrplot 0.94 loaded
# Todas
cor1s <- cor(BD1[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor1s, method="number")

##### "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs".
# Hojas jovenes
BD_hj <- BD1 %>% filter(., TIPO=="Joven") %>% select(., !c(A_2F) )
BD_hj
## # A tibble: 486 × 11
##    EST       DDE M     PL       NH    AF   L_H  A_1F  L_FT  A_FT TIPO 
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
##  1 Vanessa    22 1     1         1 12.7   6.96  1.63  4.22  3.4  Joven
##  2 Vanessa    22 1     1         5 79.0   3.04  1.49  2.32  2.17 Joven
##  3 Vanessa    22 1     1        13 13.1   6.18  3.73  3.51  3.45 Joven
##  4 Vanessa    22 1     1        14  9.43  6     2.77  3.36  2.57 Joven
##  5 Vanessa    22 1     3         1 25.6  11.9   2.48  5.33  4.60 Joven
##  6 Vanessa    22 1     3         4 27.4  11.3   2.05  7.74  5.61 Joven
##  7 Vanessa    22 1     3         9 17.6   9.66  1.76  5.36  3.90 Joven
##  8 Vanessa    22 1     3        10 19.9   9.63  4.12  5.44  3.76 Joven
##  9 Vanessa    22 1     3        11 10.4   8.04  1.13  4.00  2.70 Joven
## 10 Vanessa    22 1     4         8 32.7  15.5   2.13  7.01  5.95 Joven
## # ℹ 476 more rows
cor_hj <- cor(BD_hj[, 6:10], method = "spearman", use="pairwise.complete.obs")
corrplot(cor_hj, method="number")

# Hojas maduras
BD_ma <- BD1 %>% filter(., TIPO=="Madura")
BD_ma
## # A tibble: 3,150 × 12
##    EST       DDE M     PL       NH    AF   L_H  A_1F  A_2F  L_FT  A_FT TIPO  
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
##  1 Vanessa    22 1     1         2  88.4 21.7  10.2   7.62  7.27  4.98 Madura
##  2 Vanessa    22 1     1         3  45.7 12.6   7.00  6.63  6.93  3.80 Madura
##  3 Vanessa    22 1     1         4  74.3 22.1   9.68  7.42  6.42  4.93 Madura
##  4 Vanessa    22 1     1         6  94.9 17.4   8.84 10.2   8.10  5.06 Madura
##  5 Vanessa    22 1     1         7  21.2  6.94  4.44  5.65  2.94  1.95 Madura
##  6 Vanessa    22 1     1         8  44.0 11.8   6.77  5.07  4.88  4.06 Madura
##  7 Vanessa    22 1     1         9  67.6 14.3   9.89  8.53  7.5   4.29 Madura
##  8 Vanessa    22 1     1        10  68.4 14.4  10.2   8.65  7.35  4.23 Madura
##  9 Vanessa    22 1     1        11  97.9 18.9  10.5  11.8   5.83  5.43 Madura
## 10 Vanessa    22 1     1        12  60.0 11.7   9.18  9     5.14  4.07 Madura
## # ℹ 3,140 more rows
cor_ma <- cor(BD_ma[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor_ma, method="number")

# Division de BD

library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Cargando paquete requerido: lattice
## 
## Adjuntando el paquete: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(dplyr)
set.seed(123) # Para reproducibilidad

# Dividir el conjunto de datos en grupos de entrenamiento y validación
## Hojas jovenes
indices_entrenamiento_hj <- createDataPartition(BD_hj$AF, p = 0.8, list = FALSE)
entren_hj <- BD_hj[indices_entrenamiento_hj, ]
valid_hj <- BD_hj[-indices_entrenamiento_hj, ]

## Hojas maduras
indices_entrenamiento_ma <- createDataPartition(BD_ma$AF, p = 0.8, list = FALSE)
entren_ma <- BD_ma[indices_entrenamiento_ma, ]
valid_ma <- BD_ma[-indices_entrenamiento_ma, ]

Planteamiento de modelos

1. AF vs L_H

library(ggpubr)
## Hojas jovenes
R.L_H.hj <- lm(formula = AF~L_H, data = entren_hj)
summary(R.L_H.hj)
## 
## Call:
## lm(formula = AF ~ L_H, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.021  -2.980  -0.218   2.340  38.966 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15.0833     1.0214  -14.77   <2e-16 ***
## L_H           4.1342     0.1011   40.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.624 on 388 degrees of freedom
## Multiple R-squared:  0.8116, Adjusted R-squared:  0.8111 
## F-statistic:  1672 on 1 and 388 DF,  p-value: < 2.2e-16
## Hojas maduras
R.L_H.ma <- lm(formula = AF~L_H, data = entren_ma)
summary(R.L_H.ma)
## 
## Call:
## lm(formula = AF ~ L_H, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.578  -7.242   0.730   7.070 147.197 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -47.91433    0.82602  -58.01   <2e-16 ***
## L_H           7.61510    0.05593  136.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.61 on 2520 degrees of freedom
## Multiple R-squared:  0.8803, Adjusted R-squared:  0.8803 
## F-statistic: 1.854e+04 on 1 and 2520 DF,  p-value: < 2.2e-16

Supuestos del modelo

# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$L_H)
## 
##  Pearson's product-moment correlation
## 
## data:  entren_hj$AF and entren_hj$L_H
## t = 40.888, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8803632 0.9180757
## sample estimates:
##       cor 
## 0.9009064
# 2. Test de heterocedasticidad
plot(fitted(R.L_H.hj), resid(R.L_H.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono" 
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.L_H.hj, 3) # Revisar la horizontalidad de la linea roja

# perform Breusch-Pagan test
library(lmtest)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(R.L_H.hj)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.L_H.hj
## BP = 33.2, df = 1, p-value = 8.317e-09
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
ncvTest(R.L_H.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 205.5325, Df = 1, p = < 2.22e-16
# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.L_H.hj, 2) # segun la imagen si siguen una distribucion

hist(R.L_H.hj$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.L_H.hj$residuals) # No siguen una distribucion normal
## 
##  Shapiro-Wilk normality test
## 
## data:  R.L_H.hj$residuals
## W = 0.86611, p-value < 2.2e-16
#LillieTest(R.L_H.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.hj, 4) # Cuales casos tienen mayor distancia de cook

plot(R.L_H.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_hj$cook.L_H <- cooks.distance(R.L_H.hj)
which(entren_hj$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$L_H)
## 
##  Pearson's product-moment correlation
## 
## data:  entren_ma$AF and entren_ma$L_H
## t = 136.15, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9334086 0.9427632
## sample estimates:
##       cor 
## 0.9382572
# 2. Test de heterocedasticidad
plot(fitted(R.L_H.ma), resid(R.L_H.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

plot(R.L_H.ma, 3) # Revisar la horizontalidad de la linea roja

library(lmtest); bptest(R.L_H.ma)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.L_H.ma
## BP = 422.11, df = 1, p-value < 2.2e-16
library(car); ncvTest(R.L_H.ma) 
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 2385.598, Df = 1, p = < 2.22e-16
# 3. Normalidad de residuos
plot(R.L_H.ma, 2) # segun la imagen si siguen una distribucion

hist(R.L_H.ma$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.L_H.ma$residuals) # No siguen una distribucion normal
## 
##  Shapiro-Wilk normality test
## 
## data:  R.L_H.ma$residuals
## W = 0.90322, p-value < 2.2e-16
#LillieTest(R.L_H.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.ma, 4) # Cuales casos tienen mayor distancia de cook

plot(R.L_H.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_ma$cook.L_H <- cooks.distance(R.L_H.ma)
which(entren_ma$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)

Ajuste por MCP

# Hojas jovenes
w.L_H.hj <- 1 / lm(abs(R.L_H.hj$residuals) ~ R.L_H.hj$fitted.values)$fitted.values^2
R.MCP.L_H.hj <- lm(AF ~ L_H, data = entren_hj, weights=w.L_H.hj)
summary(R.MCP.L_H.hj)
## 
## Call:
## lm(formula = AF ~ L_H, data = entren_hj, weights = w.L_H.hj)
## 
## Weighted Residuals:
##    Min     1Q Median     3Q    Max 
## -4.853 -1.065 -0.181  0.725 48.151 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.9439     0.6334  -7.805 5.59e-14 ***
## L_H           3.0027     0.1142  26.289  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.903 on 388 degrees of freedom
## Multiple R-squared:  0.6405, Adjusted R-squared:  0.6395 
## F-statistic: 691.1 on 1 and 388 DF,  p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.L_H.hj)
## 
## Adjuntando el paquete: 'DescTools'
## The following object is masked from 'package:car':
## 
##     Recode
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
plot(R.MCP.L_H.hj, 3)

library(lmtest); bptest(R.MCP.L_H.hj)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.L_H.hj
## BP = 0.7767, df = 1, p-value = 0.3782
library(car); ncvTest(R.MCP.L_H.hj)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 349.75, Df = 1, p = < 2.22e-16
hist(R.MCP.L_H.hj$residuals)

plot(R.MCP.L_H.hj, 2)

shapiro.test(R.MCP.L_H.hj$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.L_H.hj$residuals
## W = 0.78863, p-value < 2.2e-16
#LillieTest(R.MCP.L_H.hj$residuals)
plot(R.MCP.L_H.hj, 4)

# Hojas maduras
w.L_H.ma <- 1 / lm(abs(R.L_H.ma$residuals) ~ R.L_H.ma$fitted.values)$fitted.values^2
R.MCP.L_H.ma <- lm(AF ~ L_H, data = entren_ma, weights=w.L_H.ma)
summary(R.MCP.L_H.ma)
## 
## Call:
## lm(formula = AF ~ L_H, data = entren_ma, weights = w.L_H.ma)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6038 -0.7432 -0.0932  0.6110 18.7334 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -25.08530    0.42918  -58.45   <2e-16 ***
## L_H           5.76220    0.04503  127.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.202 on 2520 degrees of freedom
## Multiple R-squared:  0.8666, Adjusted R-squared:  0.8666 
## F-statistic: 1.637e+04 on 1 and 2520 DF,  p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.L_H.ma)
plot(R.MCP.L_H.ma, 3)

library(lmtest); bptest(R.MCP.L_H.ma)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.L_H.ma
## BP = 0.55299, df = 1, p-value = 0.4571
library(car); ncvTest(R.MCP.L_H.ma)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 166.9895, Df = 1, p = < 2.22e-16
hist(R.MCP.L_H.ma$residuals)

plot(R.MCP.L_H.ma, 2)

shapiro.test(R.MCP.L_H.ma$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.L_H.ma$residuals
## W = 0.75092, p-value < 2.2e-16
#LillieTest(R.MCP.L_H.ma$residuals)
plot(R.MCP.L_H.ma, 4)

### Predicciones sobre el modelo

# Hojas jovenes
predic_hj <- predict(R.L_H.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.L_H.ma, newdata = valid_ma)

Métricas de evaluación

# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj
## [1] 9.525282
mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj
## [1] 4.076445
r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj
## [1] 0.513131
# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma
## [1] 15.02574
mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma
## [1] 9.970014
r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma
## [1] 0.8799063

2. AF vs A_1F

library(ggpubr)
## Hojas jovenes
R.A_1F.hj <- lm(formula = AF~A_1F, data = entren_hj)
summary(R.A_1F.hj)
## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.136  -7.152  -2.198   4.676  64.603 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.1620     1.2511   7.323 1.41e-12 ***
## A_1F          3.5094     0.2502  14.024  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.56 on 388 degrees of freedom
## Multiple R-squared:  0.3364, Adjusted R-squared:  0.3347 
## F-statistic: 196.7 on 1 and 388 DF,  p-value: < 2.2e-16
summary(R.A_1F.hj)$sigma
## [1] 10.55638
## Hojas maduras
R.A_1F.ma <- lm(formula = AF~A_1F, data = entren_ma)
summary(R.A_1F.ma)
## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.103 -11.724  -4.026   6.784 142.733 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -42.6878     1.1547  -36.97   <2e-16 ***
## A_1F         12.0692     0.1306   92.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.16 on 2520 degrees of freedom
## Multiple R-squared:  0.7721, Adjusted R-squared:  0.772 
## F-statistic:  8537 on 1 and 2520 DF,  p-value: < 2.2e-16
summary(R.A_1F.ma)$sigma
## [1] 20.16261

Supuestos del modelo

# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$A_1F)
## 
##  Pearson's product-moment correlation
## 
## data:  entren_hj$AF and entren_hj$A_1F
## t = 14.024, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5100440 0.6422831
## sample estimates:
##       cor 
## 0.5799715
# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.hj), resid(R.A_1F.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono" 
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.A_1F.hj, 3) # Revisar la horizontalidad de la linea roja

# perform Breusch-Pagan test
library(lmtest)
bptest(R.A_1F.hj)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.A_1F.hj
## BP = 20.429, df = 1, p-value = 6.189e-06
library(car)
ncvTest(R.A_1F.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 70.93415, Df = 1, p = < 2.22e-16
# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.A_1F.hj, 2) # segun la imagen si siguen una distribucion

hist(R.A_1F.hj$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.A_1F.hj$residuals) # No siguen una distribucion normal
## 
##  Shapiro-Wilk normality test
## 
## data:  R.A_1F.hj$residuals
## W = 0.89298, p-value = 6.932e-16
#LillieTest(R.A_1F.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.hj, 4) # Cuales casos tienen mayor distancia de cook

plot(R.A_1F.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_hj$cook.A_1F <- cooks.distance(R.A_1F.hj)
which(entren_hj$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$A_1F)
## 
##  Pearson's product-moment correlation
## 
## data:  entren_ma$AF and entren_ma$A_1F
## t = 92.398, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8694810 0.8872924
## sample estimates:
##       cor 
## 0.8786921
# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.ma), resid(R.A_1F.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

plot(R.A_1F.ma, 3) # Revisar la horizontalidad de la linea roja

library(lmtest); bptest(R.A_1F.ma)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.A_1F.ma
## BP = 229.37, df = 1, p-value < 2.2e-16
library(car); ncvTest(R.A_1F.ma) 
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 993.4437, Df = 1, p = < 2.22e-16
# 3. Normalidad de residuos
plot(R.A_1F.ma, 2) # segun la imagen si siguen una distribucion

hist(R.A_1F.ma$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.A_1F.ma$residuals) # No siguen una distribucion normal
## 
##  Shapiro-Wilk normality test
## 
## data:  R.A_1F.ma$residuals
## W = 0.86529, p-value < 2.2e-16
#LillieTest(R.A_1F.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.ma, 4) # Cuales casos tienen mayor distancia de cook

plot(R.A_1F.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_ma$cook.A_1F <- cooks.distance(R.A_1F.ma)
which(entren_ma$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)

Ajuste por MCP

# Hojas jovenes
w.A_1F.hj <- 1 / lm(abs(R.A_1F.hj$residuals) ~ R.A_1F.hj$fitted.values)$fitted.values^2
R.MCP.A_1F.hj <- lm(AF ~ A_1F, data = entren_hj, weights=w.A_1F.hj)
summary(R.MCP.A_1F.hj)
## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj, weights = w.A_1F.hj)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0862 -0.9530 -0.3101  0.6311  6.9013 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  12.2297     1.1488   10.65   <2e-16 ***
## A_1F          2.7924     0.2789   10.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.352 on 388 degrees of freedom
## Multiple R-squared:  0.2053, Adjusted R-squared:  0.2033 
## F-statistic: 100.2 on 1 and 388 DF,  p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.A_1F.hj)
plot(R.MCP.A_1F.hj, 3)

library(lmtest); bptest(R.MCP.A_1F.hj)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.A_1F.hj
## BP = 0.65768, df = 1, p-value = 0.4174
library(car); ncvTest(R.MCP.A_1F.hj)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.7695445, Df = 1, p = 0.38036
hist(R.MCP.A_1F.hj$residuals)

plot(R.MCP.A_1F.hj, 2)

shapiro.test(R.MCP.A_1F.hj$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.A_1F.hj$residuals
## W = 0.87434, p-value < 2.2e-16
#LillieTest(R.MCP.A_1F.hj$residuals)
plot(R.MCP.A_1F.hj, 4)

# Hojas maduras
w.A_1F.ma <- 1 / lm(abs(R.A_1F.ma$residuals) ~ R.A_1F.ma$fitted.values)$fitted.values^2
R.MCP.A_1F.ma <- lm(AF ~ A_1F, data = entren_ma, weights=w.A_1F.ma)
summary(R.MCP.A_1F.ma)
## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma, weights = w.A_1F.ma)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1383 -0.9258 -0.3760  0.4752 18.7645 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -12.6168     0.7514  -16.79   <2e-16 ***
## A_1F          8.1310     0.1165   69.80   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.424 on 2520 degrees of freedom
## Multiple R-squared:  0.6591, Adjusted R-squared:  0.659 
## F-statistic:  4872 on 1 and 2520 DF,  p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.A_1F.ma)
plot(R.MCP.A_1F.ma, 3)

library(lmtest); bptest(R.MCP.A_1F.ma)
## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.A_1F.ma
## BP = 0.5787, df = 1, p-value = 0.4468
library(car); ncvTest(R.MCP.A_1F.ma)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 13.1759, Df = 1, p = 0.00028357
hist(R.MCP.A_1F.ma$residuals)

plot(R.MCP.A_1F.ma, 2)

shapiro.test(R.MCP.A_1F.ma$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.A_1F.ma$residuals
## W = 0.72561, p-value < 2.2e-16
#LillieTest(R.MCP.A_1F.ma$residuals)
plot(R.MCP.A_1F.ma, 4)

### Predicciones sobre el modelo

# Hojas jovenes
predic_hj <- predict(R.A_1F.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.A_1F.ma, newdata = valid_ma)

Métricas de evaluación

# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj
## [1] 11.77434
mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj
## [1] 8.307776
r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj
## [1] 0.2367429
# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma
## [1] 22.00266
mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma
## [1] 14.35855
r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma
## [1] 0.7428278

3. AF vs LH + A_1F

## Hojas jovenes
R.LH.A_1F.hj <- lm(formula = AF ~ L_H + A_1F, data = entren_hj)
summary(R.LH.A_1F.hj)
## 
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.729  -2.439  -0.100   2.128  37.321 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -16.1619     0.9568 -16.892  < 2e-16 ***
## L_H           3.7178     0.1072  34.676  < 2e-16 ***
## A_1F          1.1323     0.1414   8.009 1.37e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.216 on 387 degrees of freedom
## Multiple R-squared:  0.8384, Adjusted R-squared:  0.8376 
## F-statistic:  1004 on 2 and 387 DF,  p-value: < 2.2e-16
summary(R.LH.A_1F.hj)$sigma
## [1] 5.215678
## Hojas maduras
R.LH.A_1F.ma <- lm(formula = AF ~ L_H + A_1F, data = entren_ma)
summary(R.LH.A_1F.ma)
## 
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.755  -7.184  -0.363   6.002 135.861 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -53.0325     0.7773  -68.23   <2e-16 ***
## L_H           5.6387     0.0974   57.89   <2e-16 ***
## A_1F          3.9137     0.1648   23.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.21 on 2519 degrees of freedom
## Multiple R-squared:  0.9022, Adjusted R-squared:  0.9021 
## F-statistic: 1.162e+04 on 2 and 2519 DF,  p-value: < 2.2e-16
summary(R.LH.A_1F.ma)$sigma
## [1] 13.20998

4. AF vs LH Pendientes paralelas

#
R.PP.LH <- lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
summary(R.PP.LH)
## 
## Call:
## lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.732  -7.080   0.715   6.603 149.437 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## L_H          7.49916    0.04814  155.79   <2e-16 ***
## TIPOMadura -46.22588    0.71205  -64.92   <2e-16 ***
## TIPOJoven  -47.68527    0.80172  -59.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.36 on 3633 degrees of freedom
## Multiple R-squared:  0.9543, Adjusted R-squared:  0.9543 
## F-statistic: 2.531e+04 on 3 and 3633 DF,  p-value: < 2.2e-16
summary(R.PP.LH)$sigma
## [1] 14.35716
#
plot(y=BD1$AF,
     x=BD1$L_H,
     pch=19,
     col=factor(BD1$TIPO))
abline(-46.22588, 7.49916, col="black", lty=2)
abline(-47.68527, 7.49916, col="red", lty=2)

#
ggplot(BD1, aes(x = L_H, y = AF, color = factor(TIPO))) +
  geom_point(size = 2, alpha=0.2) +
  geom_abline(intercept = -46.22588, slope = 7.49916, linetype = "solid", color = "#1b9e77") +
  geom_abline(intercept = -47.68527, slope = 7.49916, linetype = "solid", color = "#d95f02") +
  labs(x = "L_H", y = "AF", color = "TIPO") +
  xlab("Longitud de la hoja (cm)")+
  ylab("Promedio de area foliar por hoja ("~"cm"^2~")")+
  xlim(c(0,32))+
  ylim(c(0,200))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_point()`).

Correlograma

library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
BD1
## # A tibble: 3,636 × 12
##    EST       DDE M     PL       NH    AF   L_H  A_1F  A_2F  L_FT  A_FT TIPO  
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
##  1 Vanessa    22 1     1         1  12.7  6.96  1.63 NA     4.22  3.4  Joven 
##  2 Vanessa    22 1     1         2  88.4 21.7  10.2   7.62  7.27  4.98 Madura
##  3 Vanessa    22 1     1         3  45.7 12.6   7.00  6.63  6.93  3.80 Madura
##  4 Vanessa    22 1     1         4  74.3 22.1   9.68  7.42  6.42  4.93 Madura
##  5 Vanessa    22 1     1         5  79.0  3.04  1.49 NA     2.32  2.17 Joven 
##  6 Vanessa    22 1     1         6  94.9 17.4   8.84 10.2   8.10  5.06 Madura
##  7 Vanessa    22 1     1         7  21.2  6.94  4.44  5.65  2.94  1.95 Madura
##  8 Vanessa    22 1     1         8  44.0 11.8   6.77  5.07  4.88  4.06 Madura
##  9 Vanessa    22 1     1         9  67.6 14.3   9.89  8.53  7.5   4.29 Madura
## 10 Vanessa    22 1     1        10  68.4 14.4  10.2   8.65  7.35  4.23 Madura
## # ℹ 3,626 more rows
A <- ggpairs(BD1, columns=6:11, aes(colour=TIPO, alpha=0.2),
        upper = list(continuous = wrap("cor", size = 2.5),
        lower = list(continuous = "point", size = 0.1, alpha=0.1),
        diag = list(continuous = "densityDiag"),
        xlab=NULL,
        axisLabels = "show") )
A + theme(
  panel.background = element_blank(),
  strip.background = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).