Cargar base de datos

Imputacion de datos faltantes (Los que no fueron medidos del primer par de foliolos)

library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

prediccion <- filter(datos, is.na(A_1F) )
prediccion

## # A tibble: 62 × 15
##    EST       DDE     M FEC                    PL    NH    AF Mean   Min   Max  
##    <chr>   <dbl> <dbl> <dttm>              <dbl> <dbl> <dbl> <chr>  <chr> <chr>
##  1 Vanessa    22     1 2023-10-25 00:00:00     4    29 13.9  84827  16    215  
##  2 Vanessa    22     1 2023-10-25 00:00:00     4    32 13.9  84827  16    215  
##  3 Vanessa    22     1 2023-10-25 00:00:00     4    40 20.3  82035  14    171  
##  4 Vanessa    22     1 2023-10-25 00:00:00     5     3 15.4  109384 15    216  
##  5 Vanessa    22     1 2023-10-25 00:00:00     5    17 10.6  97743  11    213  
##  6 Vanessa    22     1 2023-10-25 00:00:00     7     1  7.32 84448  7     169  
##  7 Vanessa    22     1 2023-10-25 00:00:00     7     2  8.97 90203  11    185  
##  8 Vanessa    22     1 2023-10-25 00:00:00     7     4  7.13 98006  16    180  
##  9 Vanessa    22     1 2023-10-25 00:00:00     7     5  7.13 98006  16    180  
## 10 Vanessa    22     1 2023-10-25 00:00:00     7     7 15.6  93622  16    179  
## # ℹ 52 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>

entrenamiento <- filter(datos, !is.na(A_1F) )
entrenamiento

## # A tibble: 3,574 × 15
##    EST       DDE     M FEC                    PL    NH    AF Mean    Min   Max  
##    <chr>   <dbl> <dbl> <dttm>              <dbl> <dbl> <dbl> <chr>   <chr> <chr>
##  1 Vanessa    22     1 2023-10-25 00:00:00     1     1  12.7 64,767… 9.00  129.…
##  2 Vanessa    22     1 2023-10-25 00:00:00     1     2  88.4 80853   2     194  
##  3 Vanessa    22     1 2023-10-25 00:00:00     1     3  45.7 76323   2     162  
##  4 Vanessa    22     1 2023-10-25 00:00:00     1     4  74.3 90186   2     194  
##  5 Vanessa    22     1 2023-10-25 00:00:00     1     5  79.0 93819   2     191  
##  6 Vanessa    22     1 2023-10-25 00:00:00     1     6  94.9 67920   1     166  
##  7 Vanessa    22     1 2023-10-25 00:00:00     1     7  21.2 81335   2     178  
##  8 Vanessa    22     1 2023-10-25 00:00:00     1     8  44.0 83142   3     185  
##  9 Vanessa    22     1 2023-10-25 00:00:00     1     9  67.6 92057   9     204  
## 10 Vanessa    22     1 2023-10-25 00:00:00     1    10  68.4 91681   9     204  
## # ℹ 3,564 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>

#
modelo1 <- lm(A_1F~AF, entrenamiento)
summary(modelo1)

## 
## Call:
## lm(formula = A_1F ~ AF, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3889 -0.8112  0.1957  1.0289  6.9064 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.2460617  0.0447367   94.91   <2e-16 ***
## AF          0.0667492  0.0006609  100.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.634 on 3572 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7406 
## F-statistic: 1.02e+04 on 1 and 3572 DF,  p-value: < 2.2e-16

#
modelo2 <- lm(A_1F~L_H, entrenamiento)
summary(modelo2)

## 
## Call:
## lm(formula = A_1F ~ L_H, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2168 -0.8850  0.2261  1.0899  6.1185 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.803081   0.080214   10.01   <2e-16 ***
## L_H         0.526351   0.005612   93.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.724 on 3572 degrees of freedom
## Multiple R-squared:  0.7112, Adjusted R-squared:  0.7111 
## F-statistic:  8796 on 1 and 3572 DF,  p-value: < 2.2e-16

#
modelo3 <- lm(A_1F~L_FT, entrenamiento)
summary(modelo3)

## 
## Call:
## lm(formula = A_1F ~ L_FT, data = entrenamiento)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4612  -1.0227   0.1418   1.2171   8.7119 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.37097    0.14890  -15.92   <2e-16 ***
## L_FT         1.52970    0.02173   70.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.076 on 3572 degrees of freedom
## Multiple R-squared:  0.5812, Adjusted R-squared:  0.5811 
## F-statistic:  4957 on 1 and 3572 DF,  p-value: < 2.2e-16

#
modelo4 <- lm(A_1F~A_FT, entrenamiento)
summary(modelo4)

## 
## Call:
## lm(formula = A_1F ~ A_FT, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.133  -1.167   0.314   1.446   9.194 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.81705    0.14687  -5.563 2.85e-08 ***
## A_FT         1.96906    0.03236  60.858  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.247 on 3572 degrees of freedom
## Multiple R-squared:  0.509,  Adjusted R-squared:  0.5089 
## F-statistic:  3704 on 1 and 3572 DF,  p-value: < 2.2e-16

#
modelo5 <- lm(A_1F~ AF + L_H + A_FT, entrenamiento)
summary(modelo5)

## 
## Call:
## lm(formula = A_1F ~ AF + L_H + A_FT, data = entrenamiento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1736 -0.7769  0.2309  1.0072  6.3493 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.61767    0.14259  18.358  < 2e-16 ***
## AF           0.04417    0.00186  23.747  < 2e-16 ***
## L_H          0.16837    0.01605  10.492  < 2e-16 ***
## A_FT         0.13507    0.03967   3.405 0.000669 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.596 on 3570 degrees of freedom
## Multiple R-squared:  0.7525, Adjusted R-squared:  0.7523 
## F-statistic:  3619 on 3 and 3570 DF,  p-value: < 2.2e-16

#
modelo6 <- lm(A_1F~ AF * L_H , entrenamiento)
summary(modelo6)

## 
## Call:
## lm(formula = A_1F ~ AF * L_H, data = entrenamiento)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0456  -0.6500   0.1877   0.8093   7.0864 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.5342141  0.1049271  14.622  < 2e-16 ***
## AF           0.1601660  0.0035473  45.152  < 2e-16 ***
## L_H          0.0879052  0.0130471   6.738 1.87e-11 ***
## AF:L_H      -0.0037939  0.0001044 -36.357  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.366 on 3570 degrees of freedom
## Multiple R-squared:  0.8188, Adjusted R-squared:  0.8187 
## F-statistic:  5378 on 3 and 3570 DF,  p-value: < 2.2e-16

#
plot(modelo6)

#
prediccion$A_1F <- round( predict(modelo6, newdata = prediccion), digits = 3)
str(prediccion)

## tibble [62 × 15] (S3: tbl_df/tbl/data.frame)
##  $ EST : chr [1:62] "Vanessa" "Vanessa" "Vanessa" "Vanessa" ...
##  $ DDE : num [1:62] 22 22 22 22 22 22 22 22 22 22 ...
##  $ M   : num [1:62] 1 1 1 1 1 1 1 1 1 1 ...
##  $ FEC : POSIXct[1:62], format: "2023-10-25" "2023-10-25" ...
##  $ PL  : num [1:62] 4 4 4 5 5 7 7 7 7 7 ...
##  $ NH  : num [1:62] 29 32 40 3 17 1 2 4 5 7 ...
##  $ AF  : num [1:62] 13.9 13.9 20.3 15.4 10.6 ...
##  $ Mean: chr [1:62] "84827" "84827" "82035" "109384" ...
##  $ Min : chr [1:62] "16" "16" "14" "15" ...
##  $ Max : chr [1:62] "215" "215" "171" "216" ...
##  $ L_H : num [1:62] 9.34 9.34 9.66 7.71 6.15 ...
##  $ A_1F: Named num [1:62] 4.08 4.08 4.89 4.22 3.52 ...
##   ..- attr(*, "names")= chr [1:62] "1" "2" "3" "4" ...
##  $ A_2F: num [1:62] NA NA NA NA NA NA NA NA NA NA ...
##  $ L_FT: num [1:62] 3.94 3.94 5.32 6.22 3.54 ...
##  $ A_FT: num [1:62] 4.08 4.08 4.6 3.12 3.42 ...

#
BD <- rbind(entrenamiento, prediccion)

Base de datos

library(dplyr)
BD1 <- BD %>%
  mutate(., NH=as.numeric(NH), AF=as.numeric(AF), L_H=as.numeric(L_H),
         A_1F=as.numeric(A_1F), A_2F=as.numeric(A_2F),
         L_FT=as.numeric(L_FT), A_FT=as.numeric(A_FT),) %>% 
  mutate(TIPO = case_when(
    is.na(A_2F) ~ "Joven",
    !is.na(L_H) ~ "Madura"
  )) %>%
  mutate(
    EST = as.factor(EST),
    M = as.factor(M),
    PL = as.factor(PL),
    TIPO = factor(TIPO, levels = c("Madura","Joven") )
  ) %>% 
  filter(., L_H>6 | L_H<34  ) %>% 
  select(., -FEC, -Mean, -Min, -Max)
table(BD1$TIPO)

## 
## Madura  Joven 
##   3150    486

str(BD1)

## tibble [3,636 × 12] (S3: tbl_df/tbl/data.frame)
##  $ EST : Factor w/ 2 levels "Karoll","Vanessa": 2 2 2 2 2 2 2 2 2 2 ...
##  $ DDE : num [1:3636] 22 22 22 22 22 22 22 22 22 22 ...
##  $ M   : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PL  : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ NH  : num [1:3636] 1 2 3 4 5 6 7 8 9 10 ...
##  $ AF  : num [1:3636] 12.7 88.4 45.7 74.3 79 ...
##  $ L_H : num [1:3636] 6.96 21.68 12.57 22.09 3.04 ...
##  $ A_1F: num [1:3636] 1.63 10.16 7 9.69 1.49 ...
##  $ A_2F: num [1:3636] NA 7.62 6.63 7.42 NA ...
##  $ L_FT: num [1:3636] 4.22 7.27 6.93 6.42 2.32 ...
##  $ A_FT: num [1:3636] 3.4 4.98 3.8 4.93 2.17 ...
##  $ TIPO: Factor w/ 2 levels "Madura","Joven": 2 1 1 1 2 1 1 1 1 1 ...

Analisis exploratorio

Longitud total (cm)

library(ggplot2)
# Longitud total
ggplot(BD1, aes(x=L_H, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Longitud (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_H, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Longitud (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(L_H),
            mediana=median(L_H),
            desv=sd(L_H),
            max=max(L_H),
            min=min(L_H)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  13.3    12.0  5.14  33.9  3.04

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(L_H),
            mediana=median(L_H),
            desv=sd(L_H),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura 13.8    12.6   5.20
## 2 Joven   9.71    9.07  2.80

Ancho total 1F (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_1F, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_1F, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Ancho (cm)" )+
  xlim(c("Madura", "Joven"))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(A_1F),
            mediana=median(A_1F),
            desv=sd(A_1F),
            max=max(A_1F),
            min=min(A_1F)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  7.77    7.33  3.21  21.3  1.06

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_1F),
            mediana=median(A_1F),
            desv=sd(A_1F),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  8.27    7.79  3.05
## 2 Joven   4.50    4.22  2.11

Ancho total 2F (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_2F, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho (cm)" )+
    #geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_2F, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
    labs(x="Tipo de hoja",
       y="Ancho (cm)" )+
  xlim(c("Madura"))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).

#
BD1 %>% 
  summarise(.,
            media=mean(A_2F),
            mediana=median(A_2F),
            desv=sd(A_2F),
            max=max(A_2F),
            min=min(A_2F)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1    NA      NA    NA    NA    NA

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_2F),
            mediana=median(A_2F),
            desv=sd(A_2F),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  5.67    4.84  3.57
## 2 Joven  NA      NA    NA

Largo del foliolo terminal

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=L_FT, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  theme_bw()+
  labs(y="Conteo",
       x="Largo del foliolo terminal (cm)" )+
  theme(legend.position="top")

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_FT, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  theme_bw()+
  labs(x="Tipo de hoja",
       y="Largo del foliolo terminal (cm)" )+
  theme(legend.position="top")

#
BD1 %>% 
  summarise(.,
            media=mean(L_FT),
            mediana=median(L_FT),
            desv=sd(L_FT),
            max=max(L_FT),
            min=min(L_FT)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  6.65    6.45  1.60  13.8  1.96

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(L_FT),
            mediana=median(L_FT),
            desv=sd(L_FT),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  6.77    6.59  1.61
## 2 Joven   5.85    5.64  1.33

Ancho del foliolo terminal (cm)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_FT, fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  labs(y="Conteo",
       x="Ancho del foliolo terminal (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_FT, fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  labs(x="Tipo de hoja",
       y="Ancho del foliolo terminal (cm)" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
BD1 %>% 
  summarise(.,
            media=mean(A_FT),
            mediana=median(A_FT),
            desv=sd(A_FT),
            max=max(A_FT),
            min=min(A_FT)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  4.38    4.25  1.16  10.0  1.76

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(A_FT),
            mediana=median(A_FT),
            desv=sd(A_FT),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  4.43    4.31 1.18 
## 2 Joven   4.08    3.98 0.975

Area foliar (mm2)

library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=AF  , fill=TIPO))+
  geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
  theme_bw()+
  labs(y="Conteo",
       x="Promedio de area foliar por hoja ("~"cm"^2~")" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=AF  , fill=TIPO))+
  geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
  geom_boxplot(alpha=0.8)+
  theme_bw()+
  labs(x="Tipo de hoja",
       y="Promedio de area foliar por hoja ("~"cm"^2~")" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

BD1 %>% 
  summarise(.,
            media=mean(AF),
            mediana=median(AF),
            desv=sd(AF),
            max=max(AF),
            min=min(AF)
            )

## # A tibble: 1 × 5
##   media mediana  desv   max   min
##   <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1  53.0    39.5  41.3  357.  7.13

#
BD1 %>% 
  group_by(., TIPO) %>% 
  summarise(.,
            media=mean(AF),
            mediana=median(AF),
            desv=sd(AF),
            )

## # A tibble: 2 × 4
##   TIPO   media mediana  desv
##   <fct>  <dbl>   <dbl> <dbl>
## 1 Madura  57.3    44.3  42.4
## 2 Joven   25.2    21.8  13.0

Como cambia el AF y el NH con el tiempo

library(dplyr)
###
AF_T <- BD1 %>% 
  group_by(DDE, PL) %>% 
  summarise(S_AF=sum(AF)) %>% 
  ungroup()

## `summarise()` has grouped output by 'DDE'. You can override using the `.groups`
## argument.

ggplot(AF_T, aes(x=factor(DDE), y=S_AF))+
  geom_boxplot()+
  labs(x="Dias despues de emergencia",
       y="Area foliar por planta ("~"cm"^2~")" )+
  ylim(0, 12000)+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

Como cambia el NH con el tiempo

#
NH_T <- BD1 %>% 
  group_by(TIPO, DDE) %>% 
  summarise(S_NH= ( n()/7) ) %>% 
  ungroup()

## `summarise()` has grouped output by 'TIPO'. You can override using the
## `.groups` argument.

ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
  geom_bar(position="dodge",colour="black", stat = "identity")+
  theme_bw()+
  labs(x="Días después de emergencia",
       y="Promedio del número de hojas por planta" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

#
ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
  geom_bar(position="identity",colour="black", stat = "identity")+
  theme_bw()+
  labs(x="Días después de emergencia",
       y="Promedio del número de hojas por planta" )+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(patchwork) 
library(hrbrthemes)

## Warning: package 'hrbrthemes' was built under R version 4.4.3

library(readxl)
#
DolarColor <- "#69b3a2" #Color para dolar
EurColor <- "#CD6155" # Color para Euro
#
NH_T2 <- NH_T %>%  pivot_wider(., names_from = TIPO, values_from = S_NH)
#
library(dplyr)
#
AF_NH <- AF_T %>%
  group_by(., DDE) %>%
  summarise(., AF=mean(S_AF)) %>%
  inner_join(NH_T, by="DDE")
AF_NH

## # A tibble: 14 × 4
##      DDE    AF TIPO     S_NH
##    <dbl> <dbl> <fct>   <dbl>
##  1    22  866. Madura  17.4 
##  2    22  866. Joven    5   
##  3    31 1708. Madura  26.1 
##  4    31 1708. Joven    3.14
##  5    43 3410. Madura  46.6 
##  6    43 3410. Joven   21.1 
##  7    52 3099. Madura  46.3 
##  8    52 3099. Joven   12.6 
##  9    62 5105. Madura  82.4 
## 10    62 5105. Joven    8.43
## 11    73 7774. Madura 127.  
## 12    73 7774. Joven    9.29
## 13    82 5568. Madura 104.  
## 14    82 5568. Joven    9.86

#
coeff <- 0.02 # Coeficiente graduado para que las unidades de ambos ejes coincidan
# Por ello se utilizo para ajustar las unidades de la variable y secundaria como su eje
ggplot(AF_NH, aes(x=DDE, fill=TIPO))+
  geom_bar(aes(y=S_NH/coeff, fill=TIPO), stat = "identity", color="black")+
  geom_line(aes(y=AF), size=0.8, color="black")+
  scale_y_continuous(name="Área foliar promedio por planta ("~"cm"^2~")",
                     limits = c(0, 8000),
                     sec.axis = sec_axis(~.*coeff, name = "Número de hojas"))+
  scale_x_continuous(name="Días despues de emergencia",
                     breaks=seq(0, 90, 10))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Análisis de correlaciones

library(corrplot)

## corrplot 0.94 loaded

# Todas
cor1s <- cor(BD1[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor1s, method="number")

##### "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs".
# Hojas jovenes
BD_hj <- BD1 %>% filter(., TIPO=="Joven") %>% select(., !c(A_2F) )
BD_hj

## # A tibble: 486 × 11
##    EST       DDE M     PL       NH    AF   L_H  A_1F  L_FT  A_FT TIPO 
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
##  1 Vanessa    22 1     1         1 12.7   6.96  1.63  4.22  3.4  Joven
##  2 Vanessa    22 1     1         5 79.0   3.04  1.49  2.32  2.17 Joven
##  3 Vanessa    22 1     1        13 13.1   6.18  3.73  3.51  3.45 Joven
##  4 Vanessa    22 1     1        14  9.43  6     2.77  3.36  2.57 Joven
##  5 Vanessa    22 1     3         1 25.6  11.9   2.48  5.33  4.60 Joven
##  6 Vanessa    22 1     3         4 27.4  11.3   2.05  7.74  5.61 Joven
##  7 Vanessa    22 1     3         9 17.6   9.66  1.76  5.36  3.90 Joven
##  8 Vanessa    22 1     3        10 19.9   9.63  4.12  5.44  3.76 Joven
##  9 Vanessa    22 1     3        11 10.4   8.04  1.13  4.00  2.70 Joven
## 10 Vanessa    22 1     4         8 32.7  15.5   2.13  7.01  5.95 Joven
## # ℹ 476 more rows

cor_hj <- cor(BD_hj[, 6:10], method = "spearman", use="pairwise.complete.obs")
corrplot(cor_hj, method="number")

# Hojas maduras
BD_ma <- BD1 %>% filter(., TIPO=="Madura")
BD_ma

## # A tibble: 3,150 × 12
##    EST       DDE M     PL       NH    AF   L_H  A_1F  A_2F  L_FT  A_FT TIPO  
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
##  1 Vanessa    22 1     1         2  88.4 21.7  10.2   7.62  7.27  4.98 Madura
##  2 Vanessa    22 1     1         3  45.7 12.6   7.00  6.63  6.93  3.80 Madura
##  3 Vanessa    22 1     1         4  74.3 22.1   9.68  7.42  6.42  4.93 Madura
##  4 Vanessa    22 1     1         6  94.9 17.4   8.84 10.2   8.10  5.06 Madura
##  5 Vanessa    22 1     1         7  21.2  6.94  4.44  5.65  2.94  1.95 Madura
##  6 Vanessa    22 1     1         8  44.0 11.8   6.77  5.07  4.88  4.06 Madura
##  7 Vanessa    22 1     1         9  67.6 14.3   9.89  8.53  7.5   4.29 Madura
##  8 Vanessa    22 1     1        10  68.4 14.4  10.2   8.65  7.35  4.23 Madura
##  9 Vanessa    22 1     1        11  97.9 18.9  10.5  11.8   5.83  5.43 Madura
## 10 Vanessa    22 1     1        12  60.0 11.7   9.18  9     5.14  4.07 Madura
## # ℹ 3,140 more rows

cor_ma <- cor(BD_ma[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor_ma, method="number")

# Division de BD

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Cargando paquete requerido: lattice

## 
## Adjuntando el paquete: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library(dplyr)
set.seed(123) # Para reproducibilidad

# Dividir el conjunto de datos en grupos de entrenamiento y validación
## Hojas jovenes
indices_entrenamiento_hj <- createDataPartition(BD_hj$AF, p = 0.8, list = FALSE)
entren_hj <- BD_hj[indices_entrenamiento_hj, ]
valid_hj <- BD_hj[-indices_entrenamiento_hj, ]

## Hojas maduras
indices_entrenamiento_ma <- createDataPartition(BD_ma$AF, p = 0.8, list = FALSE)
entren_ma <- BD_ma[indices_entrenamiento_ma, ]
valid_ma <- BD_ma[-indices_entrenamiento_ma, ]

Planteamiento de modelos

1. AF vs L_H

library(ggpubr)
## Hojas jovenes
R.L_H.hj <- lm(formula = AF~L_H, data = entren_hj)
summary(R.L_H.hj)

## 
## Call:
## lm(formula = AF ~ L_H, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.021  -2.980  -0.218   2.340  38.966 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15.0833     1.0214  -14.77   <2e-16 ***
## L_H           4.1342     0.1011   40.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.624 on 388 degrees of freedom
## Multiple R-squared:  0.8116, Adjusted R-squared:  0.8111 
## F-statistic:  1672 on 1 and 388 DF,  p-value: < 2.2e-16

## Hojas maduras
R.L_H.ma <- lm(formula = AF~L_H, data = entren_ma)
summary(R.L_H.ma)

## 
## Call:
## lm(formula = AF ~ L_H, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.578  -7.242   0.730   7.070 147.197 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -47.91433    0.82602  -58.01   <2e-16 ***
## L_H           7.61510    0.05593  136.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.61 on 2520 degrees of freedom
## Multiple R-squared:  0.8803, Adjusted R-squared:  0.8803 
## F-statistic: 1.854e+04 on 1 and 2520 DF,  p-value: < 2.2e-16

Supuestos del modelo

# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$L_H)

## 
##  Pearson's product-moment correlation
## 
## data:  entren_hj$AF and entren_hj$L_H
## t = 40.888, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8803632 0.9180757
## sample estimates:
##       cor 
## 0.9009064

# 2. Test de heterocedasticidad
plot(fitted(R.L_H.hj), resid(R.L_H.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono" 
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.L_H.hj, 3) # Revisar la horizontalidad de la linea roja

# perform Breusch-Pagan test
library(lmtest)

## Cargando paquete requerido: zoo

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

bptest(R.L_H.hj)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.L_H.hj
## BP = 33.2, df = 1, p-value = 8.317e-09

library(car)

## Warning: package 'car' was built under R version 4.4.3

## Cargando paquete requerido: carData

## 
## Adjuntando el paquete: 'car'

## The following object is masked from 'package:purrr':
## 
##     some

## The following object is masked from 'package:dplyr':
## 
##     recode

ncvTest(R.L_H.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 205.5325, Df = 1, p = < 2.22e-16

# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.L_H.hj, 2) # segun la imagen si siguen una distribucion

hist(R.L_H.hj$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.L_H.hj$residuals) # No siguen una distribucion normal

## 
##  Shapiro-Wilk normality test
## 
## data:  R.L_H.hj$residuals
## W = 0.86611, p-value < 2.2e-16

#LillieTest(R.L_H.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.hj, 4) # Cuales casos tienen mayor distancia de cook

plot(R.L_H.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_hj$cook.L_H <- cooks.distance(R.L_H.hj)
which(entren_hj$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook

## named integer(0)

# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$L_H)

## 
##  Pearson's product-moment correlation
## 
## data:  entren_ma$AF and entren_ma$L_H
## t = 136.15, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9334086 0.9427632
## sample estimates:
##       cor 
## 0.9382572

# 2. Test de heterocedasticidad
plot(fitted(R.L_H.ma), resid(R.L_H.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

plot(R.L_H.ma, 3) # Revisar la horizontalidad de la linea roja

library(lmtest); bptest(R.L_H.ma)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.L_H.ma
## BP = 422.11, df = 1, p-value < 2.2e-16

library(car); ncvTest(R.L_H.ma)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 2385.598, Df = 1, p = < 2.22e-16

# 3. Normalidad de residuos
plot(R.L_H.ma, 2) # segun la imagen si siguen una distribucion

hist(R.L_H.ma$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.L_H.ma$residuals) # No siguen una distribucion normal

## 
##  Shapiro-Wilk normality test
## 
## data:  R.L_H.ma$residuals
## W = 0.90322, p-value < 2.2e-16

#LillieTest(R.L_H.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.ma, 4) # Cuales casos tienen mayor distancia de cook

plot(R.L_H.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_ma$cook.L_H <- cooks.distance(R.L_H.ma)
which(entren_ma$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook

## named integer(0)

Ajuste por MCP

# Hojas jovenes
w.L_H.hj <- 1 / lm(abs(R.L_H.hj$residuals) ~ R.L_H.hj$fitted.values)$fitted.values^2
R.MCP.L_H.hj <- lm(AF ~ L_H, data = entren_hj, weights=w.L_H.hj)
summary(R.MCP.L_H.hj)

## 
## Call:
## lm(formula = AF ~ L_H, data = entren_hj, weights = w.L_H.hj)
## 
## Weighted Residuals:
##    Min     1Q Median     3Q    Max 
## -4.853 -1.065 -0.181  0.725 48.151 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.9439     0.6334  -7.805 5.59e-14 ***
## L_H           3.0027     0.1142  26.289  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.903 on 388 degrees of freedom
## Multiple R-squared:  0.6405, Adjusted R-squared:  0.6395 
## F-statistic: 691.1 on 1 and 388 DF,  p-value: < 2.2e-16

library(DescTools); #VIF(R.MCP.L_H.hj)

## 
## Adjuntando el paquete: 'DescTools'

## The following object is masked from 'package:car':
## 
##     Recode

## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE

plot(R.MCP.L_H.hj, 3)

library(lmtest); bptest(R.MCP.L_H.hj)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.L_H.hj
## BP = 0.7767, df = 1, p-value = 0.3782

library(car); ncvTest(R.MCP.L_H.hj)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 349.75, Df = 1, p = < 2.22e-16

hist(R.MCP.L_H.hj$residuals)

plot(R.MCP.L_H.hj, 2)

shapiro.test(R.MCP.L_H.hj$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.L_H.hj$residuals
## W = 0.78863, p-value < 2.2e-16

#LillieTest(R.MCP.L_H.hj$residuals)
plot(R.MCP.L_H.hj, 4)

# Hojas maduras
w.L_H.ma <- 1 / lm(abs(R.L_H.ma$residuals) ~ R.L_H.ma$fitted.values)$fitted.values^2
R.MCP.L_H.ma <- lm(AF ~ L_H, data = entren_ma, weights=w.L_H.ma)
summary(R.MCP.L_H.ma)

## 
## Call:
## lm(formula = AF ~ L_H, data = entren_ma, weights = w.L_H.ma)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6038 -0.7432 -0.0932  0.6110 18.7334 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -25.08530    0.42918  -58.45   <2e-16 ***
## L_H           5.76220    0.04503  127.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.202 on 2520 degrees of freedom
## Multiple R-squared:  0.8666, Adjusted R-squared:  0.8666 
## F-statistic: 1.637e+04 on 1 and 2520 DF,  p-value: < 2.2e-16

library(DescTools); #VIF(R.MCP.L_H.ma)
plot(R.MCP.L_H.ma, 3)

library(lmtest); bptest(R.MCP.L_H.ma)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.L_H.ma
## BP = 0.55299, df = 1, p-value = 0.4571

library(car); ncvTest(R.MCP.L_H.ma)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 166.9895, Df = 1, p = < 2.22e-16

hist(R.MCP.L_H.ma$residuals)

plot(R.MCP.L_H.ma, 2)

shapiro.test(R.MCP.L_H.ma$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.L_H.ma$residuals
## W = 0.75092, p-value < 2.2e-16

#LillieTest(R.MCP.L_H.ma$residuals)
plot(R.MCP.L_H.ma, 4)

### Predicciones sobre el modelo

# Hojas jovenes
predic_hj <- predict(R.L_H.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.L_H.ma, newdata = valid_ma)

Métricas de evaluación

# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj

## [1] 9.525282

mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj

## [1] 4.076445

r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj

## [1] 0.513131

# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma

## [1] 15.02574

mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma

## [1] 9.970014

r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma

## [1] 0.8799063

2. AF vs A_1F

library(ggpubr)
## Hojas jovenes
R.A_1F.hj <- lm(formula = AF~A_1F, data = entren_hj)
summary(R.A_1F.hj)

## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.136  -7.152  -2.198   4.676  64.603 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.1620     1.2511   7.323 1.41e-12 ***
## A_1F          3.5094     0.2502  14.024  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.56 on 388 degrees of freedom
## Multiple R-squared:  0.3364, Adjusted R-squared:  0.3347 
## F-statistic: 196.7 on 1 and 388 DF,  p-value: < 2.2e-16

summary(R.A_1F.hj)$sigma

## [1] 10.55638

## Hojas maduras
R.A_1F.ma <- lm(formula = AF~A_1F, data = entren_ma)
summary(R.A_1F.ma)

## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.103 -11.724  -4.026   6.784 142.733 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -42.6878     1.1547  -36.97   <2e-16 ***
## A_1F         12.0692     0.1306   92.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.16 on 2520 degrees of freedom
## Multiple R-squared:  0.7721, Adjusted R-squared:  0.772 
## F-statistic:  8537 on 1 and 2520 DF,  p-value: < 2.2e-16

summary(R.A_1F.ma)$sigma

## [1] 20.16261

Supuestos del modelo

# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$A_1F)

## 
##  Pearson's product-moment correlation
## 
## data:  entren_hj$AF and entren_hj$A_1F
## t = 14.024, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5100440 0.6422831
## sample estimates:
##       cor 
## 0.5799715

# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.hj), resid(R.A_1F.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono" 
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.A_1F.hj, 3) # Revisar la horizontalidad de la linea roja

# perform Breusch-Pagan test
library(lmtest)
bptest(R.A_1F.hj)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.A_1F.hj
## BP = 20.429, df = 1, p-value = 6.189e-06

library(car)
ncvTest(R.A_1F.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 70.93415, Df = 1, p = < 2.22e-16

# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.A_1F.hj, 2) # segun la imagen si siguen una distribucion

hist(R.A_1F.hj$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.A_1F.hj$residuals) # No siguen una distribucion normal

## 
##  Shapiro-Wilk normality test
## 
## data:  R.A_1F.hj$residuals
## W = 0.89298, p-value = 6.932e-16

#LillieTest(R.A_1F.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.hj, 4) # Cuales casos tienen mayor distancia de cook

plot(R.A_1F.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_hj$cook.A_1F <- cooks.distance(R.A_1F.hj)
which(entren_hj$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook

## named integer(0)

# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$A_1F)

## 
##  Pearson's product-moment correlation
## 
## data:  entren_ma$AF and entren_ma$A_1F
## t = 92.398, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8694810 0.8872924
## sample estimates:
##       cor 
## 0.8786921

# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.ma), resid(R.A_1F.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)

plot(R.A_1F.ma, 3) # Revisar la horizontalidad de la linea roja

library(lmtest); bptest(R.A_1F.ma)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.A_1F.ma
## BP = 229.37, df = 1, p-value < 2.2e-16

library(car); ncvTest(R.A_1F.ma)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 993.4437, Df = 1, p = < 2.22e-16

# 3. Normalidad de residuos
plot(R.A_1F.ma, 2) # segun la imagen si siguen una distribucion

hist(R.A_1F.ma$residuals) # el histograma muetra una distribucion normal

shapiro.test(R.A_1F.ma$residuals) # No siguen una distribucion normal

## 
##  Shapiro-Wilk normality test
## 
## data:  R.A_1F.ma$residuals
## W = 0.86529, p-value < 2.2e-16

#LillieTest(R.A_1F.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.ma, 4) # Cuales casos tienen mayor distancia de cook

plot(R.A_1F.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes

entren_ma$cook.A_1F <- cooks.distance(R.A_1F.ma)
which(entren_ma$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook

## named integer(0)

Ajuste por MCP

# Hojas jovenes
w.A_1F.hj <- 1 / lm(abs(R.A_1F.hj$residuals) ~ R.A_1F.hj$fitted.values)$fitted.values^2
R.MCP.A_1F.hj <- lm(AF ~ A_1F, data = entren_hj, weights=w.A_1F.hj)
summary(R.MCP.A_1F.hj)

## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj, weights = w.A_1F.hj)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0862 -0.9530 -0.3101  0.6311  6.9013 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  12.2297     1.1488   10.65   <2e-16 ***
## A_1F          2.7924     0.2789   10.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.352 on 388 degrees of freedom
## Multiple R-squared:  0.2053, Adjusted R-squared:  0.2033 
## F-statistic: 100.2 on 1 and 388 DF,  p-value: < 2.2e-16

library(DescTools); #VIF(R.MCP.A_1F.hj)
plot(R.MCP.A_1F.hj, 3)

library(lmtest); bptest(R.MCP.A_1F.hj)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.A_1F.hj
## BP = 0.65768, df = 1, p-value = 0.4174

library(car); ncvTest(R.MCP.A_1F.hj)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.7695445, Df = 1, p = 0.38036

hist(R.MCP.A_1F.hj$residuals)

plot(R.MCP.A_1F.hj, 2)

shapiro.test(R.MCP.A_1F.hj$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.A_1F.hj$residuals
## W = 0.87434, p-value < 2.2e-16

#LillieTest(R.MCP.A_1F.hj$residuals)
plot(R.MCP.A_1F.hj, 4)

# Hojas maduras
w.A_1F.ma <- 1 / lm(abs(R.A_1F.ma$residuals) ~ R.A_1F.ma$fitted.values)$fitted.values^2
R.MCP.A_1F.ma <- lm(AF ~ A_1F, data = entren_ma, weights=w.A_1F.ma)
summary(R.MCP.A_1F.ma)

## 
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma, weights = w.A_1F.ma)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1383 -0.9258 -0.3760  0.4752 18.7645 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -12.6168     0.7514  -16.79   <2e-16 ***
## A_1F          8.1310     0.1165   69.80   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.424 on 2520 degrees of freedom
## Multiple R-squared:  0.6591, Adjusted R-squared:  0.659 
## F-statistic:  4872 on 1 and 2520 DF,  p-value: < 2.2e-16

library(DescTools); #VIF(R.MCP.A_1F.ma)
plot(R.MCP.A_1F.ma, 3)

library(lmtest); bptest(R.MCP.A_1F.ma)

## 
##  studentized Breusch-Pagan test
## 
## data:  R.MCP.A_1F.ma
## BP = 0.5787, df = 1, p-value = 0.4468

library(car); ncvTest(R.MCP.A_1F.ma)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 13.1759, Df = 1, p = 0.00028357

hist(R.MCP.A_1F.ma$residuals)

plot(R.MCP.A_1F.ma, 2)

shapiro.test(R.MCP.A_1F.ma$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  R.MCP.A_1F.ma$residuals
## W = 0.72561, p-value < 2.2e-16

#LillieTest(R.MCP.A_1F.ma$residuals)
plot(R.MCP.A_1F.ma, 4)

### Predicciones sobre el modelo

# Hojas jovenes
predic_hj <- predict(R.A_1F.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.A_1F.ma, newdata = valid_ma)

Métricas de evaluación

# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj

## [1] 11.77434

mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj

## [1] 8.307776

r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj

## [1] 0.2367429

# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma

## [1] 22.00266

mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma

## [1] 14.35855

r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma

## [1] 0.7428278

3. AF vs LH + A_1F

## Hojas jovenes
R.LH.A_1F.hj <- lm(formula = AF ~ L_H + A_1F, data = entren_hj)
summary(R.LH.A_1F.hj)

## 
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_hj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.729  -2.439  -0.100   2.128  37.321 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -16.1619     0.9568 -16.892  < 2e-16 ***
## L_H           3.7178     0.1072  34.676  < 2e-16 ***
## A_1F          1.1323     0.1414   8.009 1.37e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.216 on 387 degrees of freedom
## Multiple R-squared:  0.8384, Adjusted R-squared:  0.8376 
## F-statistic:  1004 on 2 and 387 DF,  p-value: < 2.2e-16

summary(R.LH.A_1F.hj)$sigma

## [1] 5.215678

## Hojas maduras
R.LH.A_1F.ma <- lm(formula = AF ~ L_H + A_1F, data = entren_ma)
summary(R.LH.A_1F.ma)

## 
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.755  -7.184  -0.363   6.002 135.861 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -53.0325     0.7773  -68.23   <2e-16 ***
## L_H           5.6387     0.0974   57.89   <2e-16 ***
## A_1F          3.9137     0.1648   23.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.21 on 2519 degrees of freedom
## Multiple R-squared:  0.9022, Adjusted R-squared:  0.9021 
## F-statistic: 1.162e+04 on 2 and 2519 DF,  p-value: < 2.2e-16

summary(R.LH.A_1F.ma)$sigma

## [1] 13.20998

4. AF vs LH Pendientes paralelas

#
R.PP.LH <- lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
summary(R.PP.LH)

## 
## Call:
## lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.732  -7.080   0.715   6.603 149.437 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## L_H          7.49916    0.04814  155.79   <2e-16 ***
## TIPOMadura -46.22588    0.71205  -64.92   <2e-16 ***
## TIPOJoven  -47.68527    0.80172  -59.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.36 on 3633 degrees of freedom
## Multiple R-squared:  0.9543, Adjusted R-squared:  0.9543 
## F-statistic: 2.531e+04 on 3 and 3633 DF,  p-value: < 2.2e-16

summary(R.PP.LH)$sigma

## [1] 14.35716

#
plot(y=BD1$AF,
     x=BD1$L_H,
     pch=19,
     col=factor(BD1$TIPO))
abline(-46.22588, 7.49916, col="black", lty=2)
abline(-47.68527, 7.49916, col="red", lty=2)

#
ggplot(BD1, aes(x = L_H, y = AF, color = factor(TIPO))) +
  geom_point(size = 2, alpha=0.2) +
  geom_abline(intercept = -46.22588, slope = 7.49916, linetype = "solid", color = "#1b9e77") +
  geom_abline(intercept = -47.68527, slope = 7.49916, linetype = "solid", color = "#d95f02") +
  labs(x = "L_H", y = "AF", color = "TIPO") +
  xlab("Longitud de la hoja (cm)")+
  ylab("Promedio de area foliar por hoja ("~"cm"^2~")")+
  xlim(c(0,32))+
  ylim(c(0,200))+
  theme_bw()+
  theme(
  legend.position="top",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_point()`).

Correlograma

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

BD1

## # A tibble: 3,636 × 12
##    EST       DDE M     PL       NH    AF   L_H  A_1F  A_2F  L_FT  A_FT TIPO  
##    <fct>   <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
##  1 Vanessa    22 1     1         1  12.7  6.96  1.63 NA     4.22  3.4  Joven 
##  2 Vanessa    22 1     1         2  88.4 21.7  10.2   7.62  7.27  4.98 Madura
##  3 Vanessa    22 1     1         3  45.7 12.6   7.00  6.63  6.93  3.80 Madura
##  4 Vanessa    22 1     1         4  74.3 22.1   9.68  7.42  6.42  4.93 Madura
##  5 Vanessa    22 1     1         5  79.0  3.04  1.49 NA     2.32  2.17 Joven 
##  6 Vanessa    22 1     1         6  94.9 17.4   8.84 10.2   8.10  5.06 Madura
##  7 Vanessa    22 1     1         7  21.2  6.94  4.44  5.65  2.94  1.95 Madura
##  8 Vanessa    22 1     1         8  44.0 11.8   6.77  5.07  4.88  4.06 Madura
##  9 Vanessa    22 1     1         9  67.6 14.3   9.89  8.53  7.5   4.29 Madura
## 10 Vanessa    22 1     1        10  68.4 14.4  10.2   8.65  7.35  4.23 Madura
## # ℹ 3,626 more rows

A <- ggpairs(BD1, columns=6:11, aes(colour=TIPO, alpha=0.2),
        upper = list(continuous = wrap("cor", size = 2.5),
        lower = list(continuous = "point", size = 0.1, alpha=0.1),
        diag = list(continuous = "densityDiag"),
        xlab=NULL,
        axisLabels = "show") )
A + theme(
  panel.background = element_blank(),
  strip.background = element_blank(),
  panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values

## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values

## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).

Analisis morfometria papa criolla

Pedro Lizarazo

2024-03-20

Cargar base de datos

Imputacion de datos faltantes (Los que no fueron medidos del primer par de foliolos)

Base de datos

Analisis exploratorio

Longitud total (cm)

Ancho total 1F (cm)

Ancho total 2F (cm)

Largo del foliolo terminal

Ancho del foliolo terminal (cm)

Area foliar (mm2)

Como cambia el AF y el NH con el tiempo

Como cambia el NH con el tiempo

Planteamiento de modelos

1. AF vs L_H

Supuestos del modelo

Ajuste por MCP

Métricas de evaluación

2. AF vs A_1F

Supuestos del modelo

Ajuste por MCP

Métricas de evaluación

3. AF vs LH + A_1F

4. AF vs LH Pendientes paralelas

Correlograma