library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
prediccion <- filter(datos, is.na(A_1F) )
prediccion
## # A tibble: 62 × 15
## EST DDE M FEC PL NH AF Mean Min Max
## <chr> <dbl> <dbl> <dttm> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Vanessa 22 1 2023-10-25 00:00:00 4 29 13.9 84827 16 215
## 2 Vanessa 22 1 2023-10-25 00:00:00 4 32 13.9 84827 16 215
## 3 Vanessa 22 1 2023-10-25 00:00:00 4 40 20.3 82035 14 171
## 4 Vanessa 22 1 2023-10-25 00:00:00 5 3 15.4 109384 15 216
## 5 Vanessa 22 1 2023-10-25 00:00:00 5 17 10.6 97743 11 213
## 6 Vanessa 22 1 2023-10-25 00:00:00 7 1 7.32 84448 7 169
## 7 Vanessa 22 1 2023-10-25 00:00:00 7 2 8.97 90203 11 185
## 8 Vanessa 22 1 2023-10-25 00:00:00 7 4 7.13 98006 16 180
## 9 Vanessa 22 1 2023-10-25 00:00:00 7 5 7.13 98006 16 180
## 10 Vanessa 22 1 2023-10-25 00:00:00 7 7 15.6 93622 16 179
## # ℹ 52 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>
entrenamiento <- filter(datos, !is.na(A_1F) )
entrenamiento
## # A tibble: 3,574 × 15
## EST DDE M FEC PL NH AF Mean Min Max
## <chr> <dbl> <dbl> <dttm> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Vanessa 22 1 2023-10-25 00:00:00 1 1 12.7 64,767… 9.00 129.…
## 2 Vanessa 22 1 2023-10-25 00:00:00 1 2 88.4 80853 2 194
## 3 Vanessa 22 1 2023-10-25 00:00:00 1 3 45.7 76323 2 162
## 4 Vanessa 22 1 2023-10-25 00:00:00 1 4 74.3 90186 2 194
## 5 Vanessa 22 1 2023-10-25 00:00:00 1 5 79.0 93819 2 191
## 6 Vanessa 22 1 2023-10-25 00:00:00 1 6 94.9 67920 1 166
## 7 Vanessa 22 1 2023-10-25 00:00:00 1 7 21.2 81335 2 178
## 8 Vanessa 22 1 2023-10-25 00:00:00 1 8 44.0 83142 3 185
## 9 Vanessa 22 1 2023-10-25 00:00:00 1 9 67.6 92057 9 204
## 10 Vanessa 22 1 2023-10-25 00:00:00 1 10 68.4 91681 9 204
## # ℹ 3,564 more rows
## # ℹ 5 more variables: L_H <dbl>, A_1F <dbl>, A_2F <dbl>, L_FT <dbl>, A_FT <dbl>
#
modelo1 <- lm(A_1F~AF, entrenamiento)
summary(modelo1)
##
## Call:
## lm(formula = A_1F ~ AF, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3889 -0.8112 0.1957 1.0289 6.9064
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2460617 0.0447367 94.91 <2e-16 ***
## AF 0.0667492 0.0006609 100.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.634 on 3572 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7406
## F-statistic: 1.02e+04 on 1 and 3572 DF, p-value: < 2.2e-16
#
modelo2 <- lm(A_1F~L_H, entrenamiento)
summary(modelo2)
##
## Call:
## lm(formula = A_1F ~ L_H, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2168 -0.8850 0.2261 1.0899 6.1185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.803081 0.080214 10.01 <2e-16 ***
## L_H 0.526351 0.005612 93.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.724 on 3572 degrees of freedom
## Multiple R-squared: 0.7112, Adjusted R-squared: 0.7111
## F-statistic: 8796 on 1 and 3572 DF, p-value: < 2.2e-16
#
modelo3 <- lm(A_1F~L_FT, entrenamiento)
summary(modelo3)
##
## Call:
## lm(formula = A_1F ~ L_FT, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.4612 -1.0227 0.1418 1.2171 8.7119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.37097 0.14890 -15.92 <2e-16 ***
## L_FT 1.52970 0.02173 70.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.076 on 3572 degrees of freedom
## Multiple R-squared: 0.5812, Adjusted R-squared: 0.5811
## F-statistic: 4957 on 1 and 3572 DF, p-value: < 2.2e-16
#
modelo4 <- lm(A_1F~A_FT, entrenamiento)
summary(modelo4)
##
## Call:
## lm(formula = A_1F ~ A_FT, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.133 -1.167 0.314 1.446 9.194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.81705 0.14687 -5.563 2.85e-08 ***
## A_FT 1.96906 0.03236 60.858 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.247 on 3572 degrees of freedom
## Multiple R-squared: 0.509, Adjusted R-squared: 0.5089
## F-statistic: 3704 on 1 and 3572 DF, p-value: < 2.2e-16
#
modelo5 <- lm(A_1F~ AF + L_H + A_FT, entrenamiento)
summary(modelo5)
##
## Call:
## lm(formula = A_1F ~ AF + L_H + A_FT, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1736 -0.7769 0.2309 1.0072 6.3493
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.61767 0.14259 18.358 < 2e-16 ***
## AF 0.04417 0.00186 23.747 < 2e-16 ***
## L_H 0.16837 0.01605 10.492 < 2e-16 ***
## A_FT 0.13507 0.03967 3.405 0.000669 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.596 on 3570 degrees of freedom
## Multiple R-squared: 0.7525, Adjusted R-squared: 0.7523
## F-statistic: 3619 on 3 and 3570 DF, p-value: < 2.2e-16
#
modelo6 <- lm(A_1F~ AF * L_H , entrenamiento)
summary(modelo6)
##
## Call:
## lm(formula = A_1F ~ AF * L_H, data = entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0456 -0.6500 0.1877 0.8093 7.0864
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.5342141 0.1049271 14.622 < 2e-16 ***
## AF 0.1601660 0.0035473 45.152 < 2e-16 ***
## L_H 0.0879052 0.0130471 6.738 1.87e-11 ***
## AF:L_H -0.0037939 0.0001044 -36.357 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.366 on 3570 degrees of freedom
## Multiple R-squared: 0.8188, Adjusted R-squared: 0.8187
## F-statistic: 5378 on 3 and 3570 DF, p-value: < 2.2e-16
#
plot(modelo6)
#
prediccion$A_1F <- round( predict(modelo6, newdata = prediccion), digits = 3)
str(prediccion)
## tibble [62 × 15] (S3: tbl_df/tbl/data.frame)
## $ EST : chr [1:62] "Vanessa" "Vanessa" "Vanessa" "Vanessa" ...
## $ DDE : num [1:62] 22 22 22 22 22 22 22 22 22 22 ...
## $ M : num [1:62] 1 1 1 1 1 1 1 1 1 1 ...
## $ FEC : POSIXct[1:62], format: "2023-10-25" "2023-10-25" ...
## $ PL : num [1:62] 4 4 4 5 5 7 7 7 7 7 ...
## $ NH : num [1:62] 29 32 40 3 17 1 2 4 5 7 ...
## $ AF : num [1:62] 13.9 13.9 20.3 15.4 10.6 ...
## $ Mean: chr [1:62] "84827" "84827" "82035" "109384" ...
## $ Min : chr [1:62] "16" "16" "14" "15" ...
## $ Max : chr [1:62] "215" "215" "171" "216" ...
## $ L_H : num [1:62] 9.34 9.34 9.66 7.71 6.15 ...
## $ A_1F: Named num [1:62] 4.08 4.08 4.89 4.22 3.52 ...
## ..- attr(*, "names")= chr [1:62] "1" "2" "3" "4" ...
## $ A_2F: num [1:62] NA NA NA NA NA NA NA NA NA NA ...
## $ L_FT: num [1:62] 3.94 3.94 5.32 6.22 3.54 ...
## $ A_FT: num [1:62] 4.08 4.08 4.6 3.12 3.42 ...
#
BD <- rbind(entrenamiento, prediccion)
library(dplyr)
BD1 <- BD %>%
mutate(., NH=as.numeric(NH), AF=as.numeric(AF), L_H=as.numeric(L_H),
A_1F=as.numeric(A_1F), A_2F=as.numeric(A_2F),
L_FT=as.numeric(L_FT), A_FT=as.numeric(A_FT),) %>%
mutate(TIPO = case_when(
is.na(A_2F) ~ "Joven",
!is.na(L_H) ~ "Madura"
)) %>%
mutate(
EST = as.factor(EST),
M = as.factor(M),
PL = as.factor(PL),
TIPO = factor(TIPO, levels = c("Madura","Joven") )
) %>%
filter(., L_H>6 | L_H<34 ) %>%
select(., -FEC, -Mean, -Min, -Max)
table(BD1$TIPO)
##
## Madura Joven
## 3150 486
str(BD1)
## tibble [3,636 × 12] (S3: tbl_df/tbl/data.frame)
## $ EST : Factor w/ 2 levels "Karoll","Vanessa": 2 2 2 2 2 2 2 2 2 2 ...
## $ DDE : num [1:3636] 22 22 22 22 22 22 22 22 22 22 ...
## $ M : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PL : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ NH : num [1:3636] 1 2 3 4 5 6 7 8 9 10 ...
## $ AF : num [1:3636] 12.7 88.4 45.7 74.3 79 ...
## $ L_H : num [1:3636] 6.96 21.68 12.57 22.09 3.04 ...
## $ A_1F: num [1:3636] 1.63 10.16 7 9.69 1.49 ...
## $ A_2F: num [1:3636] NA 7.62 6.63 7.42 NA ...
## $ L_FT: num [1:3636] 4.22 7.27 6.93 6.42 2.32 ...
## $ A_FT: num [1:3636] 3.4 4.98 3.8 4.93 2.17 ...
## $ TIPO: Factor w/ 2 levels "Madura","Joven": 2 1 1 1 2 1 1 1 1 1 ...
library(ggplot2)
# Longitud total
ggplot(BD1, aes(x=L_H, fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
labs(y="Conteo",
x="Longitud (cm)" )+
#geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_H, fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
labs(x="Tipo de hoja",
y="Longitud (cm)" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
#
BD1 %>%
summarise(.,
media=mean(L_H),
mediana=median(L_H),
desv=sd(L_H),
max=max(L_H),
min=min(L_H)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13.3 12.0 5.14 33.9 3.04
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(L_H),
mediana=median(L_H),
desv=sd(L_H),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 13.8 12.6 5.20
## 2 Joven 9.71 9.07 2.80
library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_1F, fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
labs(y="Conteo",
x="Ancho (cm)" )+
#geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_1F, fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
labs(x="Tipo de hoja",
y="Ancho (cm)" )+
xlim(c("Madura", "Joven"))+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
#
BD1 %>%
summarise(.,
media=mean(A_1F),
mediana=median(A_1F),
desv=sd(A_1F),
max=max(A_1F),
min=min(A_1F)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7.77 7.33 3.21 21.3 1.06
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(A_1F),
mediana=median(A_1F),
desv=sd(A_1F),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 8.27 7.79 3.05
## 2 Joven 4.50 4.22 2.11
library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_2F, fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
labs(y="Conteo",
x="Ancho (cm)" )+
#geom_vline(data=mu, aes(xintercept=grp.mean, color=ENS), linetype="dashed", size=0.8)+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_2F, fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
labs(x="Tipo de hoja",
y="Ancho (cm)" )+
xlim(c("Madura"))+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
#
BD1 %>%
summarise(.,
media=mean(A_2F),
mediana=median(A_2F),
desv=sd(A_2F),
max=max(A_2F),
min=min(A_2F)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA NA
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(A_2F),
mediana=median(A_2F),
desv=sd(A_2F),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 5.67 4.84 3.57
## 2 Joven NA NA NA
library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=L_FT, fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
theme_bw()+
labs(y="Conteo",
x="Largo del foliolo terminal (cm)" )+
theme(legend.position="top")
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=L_FT, fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
theme_bw()+
labs(x="Tipo de hoja",
y="Largo del foliolo terminal (cm)" )+
theme(legend.position="top")
#
BD1 %>%
summarise(.,
media=mean(L_FT),
mediana=median(L_FT),
desv=sd(L_FT),
max=max(L_FT),
min=min(L_FT)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6.65 6.45 1.60 13.8 1.96
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(L_FT),
mediana=median(L_FT),
desv=sd(L_FT),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 6.77 6.59 1.61
## 2 Joven 5.85 5.64 1.33
library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=A_FT, fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
labs(y="Conteo",
x="Ancho del foliolo terminal (cm)" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=A_FT, fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
labs(x="Tipo de hoja",
y="Ancho del foliolo terminal (cm)" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
#
BD1 %>%
summarise(.,
media=mean(A_FT),
mediana=median(A_FT),
desv=sd(A_FT),
max=max(A_FT),
min=min(A_FT)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4.38 4.25 1.16 10.0 1.76
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(A_FT),
mediana=median(A_FT),
desv=sd(A_FT),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 4.43 4.31 1.18
## 2 Joven 4.08 3.98 0.975
library(ggplot2)
# Ancho total 1F
ggplot(BD1, aes(x=AF , fill=TIPO))+
geom_histogram(bins = 50, alpha=0.5, position = "identity", color='gray50')+
theme_bw()+
labs(y="Conteo",
x="Promedio de area foliar por hoja ("~"cm"^2~")" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
# Grafico AFP por edad
ggplot(BD1, aes(x=factor(TIPO), y=AF , fill=TIPO))+
geom_jitter(aes(color=TIPO), shape=16, position=position_jitter(0.2), alpha=0.2)+
geom_boxplot(alpha=0.8)+
theme_bw()+
labs(x="Tipo de hoja",
y="Promedio de area foliar por hoja ("~"cm"^2~")" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
BD1 %>%
summarise(.,
media=mean(AF),
mediana=median(AF),
desv=sd(AF),
max=max(AF),
min=min(AF)
)
## # A tibble: 1 × 5
## media mediana desv max min
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 53.0 39.5 41.3 357. 7.13
#
BD1 %>%
group_by(., TIPO) %>%
summarise(.,
media=mean(AF),
mediana=median(AF),
desv=sd(AF),
)
## # A tibble: 2 × 4
## TIPO media mediana desv
## <fct> <dbl> <dbl> <dbl>
## 1 Madura 57.3 44.3 42.4
## 2 Joven 25.2 21.8 13.0
library(dplyr)
###
AF_T <- BD1 %>%
group_by(DDE, PL) %>%
summarise(S_AF=sum(AF)) %>%
ungroup()
## `summarise()` has grouped output by 'DDE'. You can override using the `.groups`
## argument.
ggplot(AF_T, aes(x=factor(DDE), y=S_AF))+
geom_boxplot()+
labs(x="Dias despues de emergencia",
y="Area foliar por planta ("~"cm"^2~")" )+
ylim(0, 12000)+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
#
NH_T <- BD1 %>%
group_by(TIPO, DDE) %>%
summarise(S_NH= ( n()/7) ) %>%
ungroup()
## `summarise()` has grouped output by 'TIPO'. You can override using the
## `.groups` argument.
ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
geom_bar(position="dodge",colour="black", stat = "identity")+
theme_bw()+
labs(x="Días después de emergencia",
y="Promedio del número de hojas por planta" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
#
ggplot(NH_T, aes(x=factor(DDE), y=S_NH, fill=TIPO ))+
geom_bar(position="identity",colour="black", stat = "identity")+
theme_bw()+
labs(x="Días después de emergencia",
y="Promedio del número de hojas por planta" )+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.4.3
library(readxl)
#
DolarColor <- "#69b3a2" #Color para dolar
EurColor <- "#CD6155" # Color para Euro
#
NH_T2 <- NH_T %>% pivot_wider(., names_from = TIPO, values_from = S_NH)
#
library(dplyr)
#
AF_NH <- AF_T %>%
group_by(., DDE) %>%
summarise(., AF=mean(S_AF)) %>%
inner_join(NH_T, by="DDE")
AF_NH
## # A tibble: 14 × 4
## DDE AF TIPO S_NH
## <dbl> <dbl> <fct> <dbl>
## 1 22 866. Madura 17.4
## 2 22 866. Joven 5
## 3 31 1708. Madura 26.1
## 4 31 1708. Joven 3.14
## 5 43 3410. Madura 46.6
## 6 43 3410. Joven 21.1
## 7 52 3099. Madura 46.3
## 8 52 3099. Joven 12.6
## 9 62 5105. Madura 82.4
## 10 62 5105. Joven 8.43
## 11 73 7774. Madura 127.
## 12 73 7774. Joven 9.29
## 13 82 5568. Madura 104.
## 14 82 5568. Joven 9.86
#
coeff <- 0.02 # Coeficiente graduado para que las unidades de ambos ejes coincidan
# Por ello se utilizo para ajustar las unidades de la variable y secundaria como su eje
ggplot(AF_NH, aes(x=DDE, fill=TIPO))+
geom_bar(aes(y=S_NH/coeff, fill=TIPO), stat = "identity", color="black")+
geom_line(aes(y=AF), size=0.8, color="black")+
scale_y_continuous(name="Área foliar promedio por planta ("~"cm"^2~")",
limits = c(0, 8000),
sec.axis = sec_axis(~.*coeff, name = "Número de hojas"))+
scale_x_continuous(name="Días despues de emergencia",
breaks=seq(0, 90, 10))+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Análisis de correlaciones
library(corrplot)
## corrplot 0.94 loaded
# Todas
cor1s <- cor(BD1[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor1s, method="number")
##### "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs".
# Hojas jovenes
BD_hj <- BD1 %>% filter(., TIPO=="Joven") %>% select(., !c(A_2F) )
BD_hj
## # A tibble: 486 × 11
## EST DDE M PL NH AF L_H A_1F L_FT A_FT TIPO
## <fct> <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 Vanessa 22 1 1 1 12.7 6.96 1.63 4.22 3.4 Joven
## 2 Vanessa 22 1 1 5 79.0 3.04 1.49 2.32 2.17 Joven
## 3 Vanessa 22 1 1 13 13.1 6.18 3.73 3.51 3.45 Joven
## 4 Vanessa 22 1 1 14 9.43 6 2.77 3.36 2.57 Joven
## 5 Vanessa 22 1 3 1 25.6 11.9 2.48 5.33 4.60 Joven
## 6 Vanessa 22 1 3 4 27.4 11.3 2.05 7.74 5.61 Joven
## 7 Vanessa 22 1 3 9 17.6 9.66 1.76 5.36 3.90 Joven
## 8 Vanessa 22 1 3 10 19.9 9.63 4.12 5.44 3.76 Joven
## 9 Vanessa 22 1 3 11 10.4 8.04 1.13 4.00 2.70 Joven
## 10 Vanessa 22 1 4 8 32.7 15.5 2.13 7.01 5.95 Joven
## # ℹ 476 more rows
cor_hj <- cor(BD_hj[, 6:10], method = "spearman", use="pairwise.complete.obs")
corrplot(cor_hj, method="number")
# Hojas maduras
BD_ma <- BD1 %>% filter(., TIPO=="Madura")
BD_ma
## # A tibble: 3,150 × 12
## EST DDE M PL NH AF L_H A_1F A_2F L_FT A_FT TIPO
## <fct> <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 Vanessa 22 1 1 2 88.4 21.7 10.2 7.62 7.27 4.98 Madura
## 2 Vanessa 22 1 1 3 45.7 12.6 7.00 6.63 6.93 3.80 Madura
## 3 Vanessa 22 1 1 4 74.3 22.1 9.68 7.42 6.42 4.93 Madura
## 4 Vanessa 22 1 1 6 94.9 17.4 8.84 10.2 8.10 5.06 Madura
## 5 Vanessa 22 1 1 7 21.2 6.94 4.44 5.65 2.94 1.95 Madura
## 6 Vanessa 22 1 1 8 44.0 11.8 6.77 5.07 4.88 4.06 Madura
## 7 Vanessa 22 1 1 9 67.6 14.3 9.89 8.53 7.5 4.29 Madura
## 8 Vanessa 22 1 1 10 68.4 14.4 10.2 8.65 7.35 4.23 Madura
## 9 Vanessa 22 1 1 11 97.9 18.9 10.5 11.8 5.83 5.43 Madura
## 10 Vanessa 22 1 1 12 60.0 11.7 9.18 9 5.14 4.07 Madura
## # ℹ 3,140 more rows
cor_ma <- cor(BD_ma[, 6:11], method = "spearman", use = "pairwise.complete.obs")
corrplot(cor_ma, method="number")
# Division de BD
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Cargando paquete requerido: lattice
##
## Adjuntando el paquete: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(dplyr)
set.seed(123) # Para reproducibilidad
# Dividir el conjunto de datos en grupos de entrenamiento y validación
## Hojas jovenes
indices_entrenamiento_hj <- createDataPartition(BD_hj$AF, p = 0.8, list = FALSE)
entren_hj <- BD_hj[indices_entrenamiento_hj, ]
valid_hj <- BD_hj[-indices_entrenamiento_hj, ]
## Hojas maduras
indices_entrenamiento_ma <- createDataPartition(BD_ma$AF, p = 0.8, list = FALSE)
entren_ma <- BD_ma[indices_entrenamiento_ma, ]
valid_ma <- BD_ma[-indices_entrenamiento_ma, ]
library(ggpubr)
## Hojas jovenes
R.L_H.hj <- lm(formula = AF~L_H, data = entren_hj)
summary(R.L_H.hj)
##
## Call:
## lm(formula = AF ~ L_H, data = entren_hj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.021 -2.980 -0.218 2.340 38.966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.0833 1.0214 -14.77 <2e-16 ***
## L_H 4.1342 0.1011 40.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.624 on 388 degrees of freedom
## Multiple R-squared: 0.8116, Adjusted R-squared: 0.8111
## F-statistic: 1672 on 1 and 388 DF, p-value: < 2.2e-16
## Hojas maduras
R.L_H.ma <- lm(formula = AF~L_H, data = entren_ma)
summary(R.L_H.ma)
##
## Call:
## lm(formula = AF ~ L_H, data = entren_ma)
##
## Residuals:
## Min 1Q Median 3Q Max
## -61.578 -7.242 0.730 7.070 147.197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -47.91433 0.82602 -58.01 <2e-16 ***
## L_H 7.61510 0.05593 136.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.61 on 2520 degrees of freedom
## Multiple R-squared: 0.8803, Adjusted R-squared: 0.8803
## F-statistic: 1.854e+04 on 1 and 2520 DF, p-value: < 2.2e-16
# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$L_H)
##
## Pearson's product-moment correlation
##
## data: entren_hj$AF and entren_hj$L_H
## t = 40.888, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8803632 0.9180757
## sample estimates:
## cor
## 0.9009064
# 2. Test de heterocedasticidad
plot(fitted(R.L_H.hj), resid(R.L_H.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)
# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono"
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.L_H.hj, 3) # Revisar la horizontalidad de la linea roja
# perform Breusch-Pagan test
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(R.L_H.hj)
##
## studentized Breusch-Pagan test
##
## data: R.L_H.hj
## BP = 33.2, df = 1, p-value = 8.317e-09
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
ncvTest(R.L_H.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 205.5325, Df = 1, p = < 2.22e-16
# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.L_H.hj, 2) # segun la imagen si siguen una distribucion
hist(R.L_H.hj$residuals) # el histograma muetra una distribucion normal
shapiro.test(R.L_H.hj$residuals) # No siguen una distribucion normal
##
## Shapiro-Wilk normality test
##
## data: R.L_H.hj$residuals
## W = 0.86611, p-value < 2.2e-16
#LillieTest(R.L_H.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.hj, 4) # Cuales casos tienen mayor distancia de cook
plot(R.L_H.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes
entren_hj$cook.L_H <- cooks.distance(R.L_H.hj)
which(entren_hj$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$L_H)
##
## Pearson's product-moment correlation
##
## data: entren_ma$AF and entren_ma$L_H
## t = 136.15, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9334086 0.9427632
## sample estimates:
## cor
## 0.9382572
# 2. Test de heterocedasticidad
plot(fitted(R.L_H.ma), resid(R.L_H.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)
plot(R.L_H.ma, 3) # Revisar la horizontalidad de la linea roja
library(lmtest); bptest(R.L_H.ma)
##
## studentized Breusch-Pagan test
##
## data: R.L_H.ma
## BP = 422.11, df = 1, p-value < 2.2e-16
library(car); ncvTest(R.L_H.ma)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2385.598, Df = 1, p = < 2.22e-16
# 3. Normalidad de residuos
plot(R.L_H.ma, 2) # segun la imagen si siguen una distribucion
hist(R.L_H.ma$residuals) # el histograma muetra una distribucion normal
shapiro.test(R.L_H.ma$residuals) # No siguen una distribucion normal
##
## Shapiro-Wilk normality test
##
## data: R.L_H.ma$residuals
## W = 0.90322, p-value < 2.2e-16
#LillieTest(R.L_H.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.L_H.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.L_H.ma, 4) # Cuales casos tienen mayor distancia de cook
plot(R.L_H.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes
entren_ma$cook.L_H <- cooks.distance(R.L_H.ma)
which(entren_ma$cook.L_H>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas jovenes
w.L_H.hj <- 1 / lm(abs(R.L_H.hj$residuals) ~ R.L_H.hj$fitted.values)$fitted.values^2
R.MCP.L_H.hj <- lm(AF ~ L_H, data = entren_hj, weights=w.L_H.hj)
summary(R.MCP.L_H.hj)
##
## Call:
## lm(formula = AF ~ L_H, data = entren_hj, weights = w.L_H.hj)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -4.853 -1.065 -0.181 0.725 48.151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.9439 0.6334 -7.805 5.59e-14 ***
## L_H 3.0027 0.1142 26.289 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.903 on 388 degrees of freedom
## Multiple R-squared: 0.6405, Adjusted R-squared: 0.6395
## F-statistic: 691.1 on 1 and 388 DF, p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.L_H.hj)
##
## Adjuntando el paquete: 'DescTools'
## The following object is masked from 'package:car':
##
## Recode
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
plot(R.MCP.L_H.hj, 3)
library(lmtest); bptest(R.MCP.L_H.hj)
##
## studentized Breusch-Pagan test
##
## data: R.MCP.L_H.hj
## BP = 0.7767, df = 1, p-value = 0.3782
library(car); ncvTest(R.MCP.L_H.hj)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 349.75, Df = 1, p = < 2.22e-16
hist(R.MCP.L_H.hj$residuals)
plot(R.MCP.L_H.hj, 2)
shapiro.test(R.MCP.L_H.hj$residuals)
##
## Shapiro-Wilk normality test
##
## data: R.MCP.L_H.hj$residuals
## W = 0.78863, p-value < 2.2e-16
#LillieTest(R.MCP.L_H.hj$residuals)
plot(R.MCP.L_H.hj, 4)
# Hojas maduras
w.L_H.ma <- 1 / lm(abs(R.L_H.ma$residuals) ~ R.L_H.ma$fitted.values)$fitted.values^2
R.MCP.L_H.ma <- lm(AF ~ L_H, data = entren_ma, weights=w.L_H.ma)
summary(R.MCP.L_H.ma)
##
## Call:
## lm(formula = AF ~ L_H, data = entren_ma, weights = w.L_H.ma)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -3.6038 -0.7432 -0.0932 0.6110 18.7334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -25.08530 0.42918 -58.45 <2e-16 ***
## L_H 5.76220 0.04503 127.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.202 on 2520 degrees of freedom
## Multiple R-squared: 0.8666, Adjusted R-squared: 0.8666
## F-statistic: 1.637e+04 on 1 and 2520 DF, p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.L_H.ma)
plot(R.MCP.L_H.ma, 3)
library(lmtest); bptest(R.MCP.L_H.ma)
##
## studentized Breusch-Pagan test
##
## data: R.MCP.L_H.ma
## BP = 0.55299, df = 1, p-value = 0.4571
library(car); ncvTest(R.MCP.L_H.ma)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 166.9895, Df = 1, p = < 2.22e-16
hist(R.MCP.L_H.ma$residuals)
plot(R.MCP.L_H.ma, 2)
shapiro.test(R.MCP.L_H.ma$residuals)
##
## Shapiro-Wilk normality test
##
## data: R.MCP.L_H.ma$residuals
## W = 0.75092, p-value < 2.2e-16
#LillieTest(R.MCP.L_H.ma$residuals)
plot(R.MCP.L_H.ma, 4)
### Predicciones sobre el modelo
# Hojas jovenes
predic_hj <- predict(R.L_H.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.L_H.ma, newdata = valid_ma)
# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj
## [1] 9.525282
mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj
## [1] 4.076445
r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj
## [1] 0.513131
# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma
## [1] 15.02574
mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma
## [1] 9.970014
r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma
## [1] 0.8799063
library(ggpubr)
## Hojas jovenes
R.A_1F.hj <- lm(formula = AF~A_1F, data = entren_hj)
summary(R.A_1F.hj)
##
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.136 -7.152 -2.198 4.676 64.603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.1620 1.2511 7.323 1.41e-12 ***
## A_1F 3.5094 0.2502 14.024 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.56 on 388 degrees of freedom
## Multiple R-squared: 0.3364, Adjusted R-squared: 0.3347
## F-statistic: 196.7 on 1 and 388 DF, p-value: < 2.2e-16
summary(R.A_1F.hj)$sigma
## [1] 10.55638
## Hojas maduras
R.A_1F.ma <- lm(formula = AF~A_1F, data = entren_ma)
summary(R.A_1F.ma)
##
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.103 -11.724 -4.026 6.784 142.733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.6878 1.1547 -36.97 <2e-16 ***
## A_1F 12.0692 0.1306 92.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.16 on 2520 degrees of freedom
## Multiple R-squared: 0.7721, Adjusted R-squared: 0.772
## F-statistic: 8537 on 1 and 2520 DF, p-value: < 2.2e-16
summary(R.A_1F.ma)$sigma
## [1] 20.16261
# Hojas jovenes
# 1. Test de linealidad
cor.test(entren_hj$AF, entren_hj$A_1F)
##
## Pearson's product-moment correlation
##
## data: entren_hj$AF and entren_hj$A_1F
## t = 14.024, df = 388, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5100440 0.6422831
## sample estimates:
## cor
## 0.5799715
# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.hj), resid(R.A_1F.hj), xlab='Fitted Values', ylab='Residuals'); abline(0,0)
# Podemos ver en la gr?fica que los residuos exhiben una forma de "cono"
## no est?n distribuidos con la misma varianza en toda la gr?fica.
plot(R.A_1F.hj, 3) # Revisar la horizontalidad de la linea roja
# perform Breusch-Pagan test
library(lmtest)
bptest(R.A_1F.hj)
##
## studentized Breusch-Pagan test
##
## data: R.A_1F.hj
## BP = 20.429, df = 1, p-value = 6.189e-06
library(car)
ncvTest(R.A_1F.hj) # Since the p-value from the test is <0.05 we will reject the null hypothesis
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 70.93415, Df = 1, p = < 2.22e-16
# and conclude that heteroscedasticity is a problem in this model
# 3. Normalidad de residuos
plot(R.A_1F.hj, 2) # segun la imagen si siguen una distribucion
hist(R.A_1F.hj$residuals) # el histograma muetra una distribucion normal
shapiro.test(R.A_1F.hj$residuals) # No siguen una distribucion normal
##
## Shapiro-Wilk normality test
##
## data: R.A_1F.hj$residuals
## W = 0.89298, p-value = 6.932e-16
#LillieTest(R.A_1F.hj$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.hj) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.hj, 4) # Cuales casos tienen mayor distancia de cook
plot(R.A_1F.hj, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes
entren_hj$cook.A_1F <- cooks.distance(R.A_1F.hj)
which(entren_hj$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas maduras
# 1. Test de linealidad
cor.test(entren_ma$AF, entren_ma$A_1F)
##
## Pearson's product-moment correlation
##
## data: entren_ma$AF and entren_ma$A_1F
## t = 92.398, df = 2520, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8694810 0.8872924
## sample estimates:
## cor
## 0.8786921
# 2. Test de heterocedasticidad
plot(fitted(R.A_1F.ma), resid(R.A_1F.ma), xlab='Fitted Values', ylab='Residuals'); abline(0,0)
plot(R.A_1F.ma, 3) # Revisar la horizontalidad de la linea roja
library(lmtest); bptest(R.A_1F.ma)
##
## studentized Breusch-Pagan test
##
## data: R.A_1F.ma
## BP = 229.37, df = 1, p-value < 2.2e-16
library(car); ncvTest(R.A_1F.ma)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 993.4437, Df = 1, p = < 2.22e-16
# 3. Normalidad de residuos
plot(R.A_1F.ma, 2) # segun la imagen si siguen una distribucion
hist(R.A_1F.ma$residuals) # el histograma muetra una distribucion normal
shapiro.test(R.A_1F.ma$residuals) # No siguen una distribucion normal
##
## Shapiro-Wilk normality test
##
## data: R.A_1F.ma$residuals
## W = 0.86529, p-value < 2.2e-16
#LillieTest(R.A_1F.ma$residuals) # normales al 1%
# 4. Test de multicolinealidad (SI SE TIENENMAS DE 2 VARIABLES DEPENDIENTES EN EL MODELO)
#library(DescTools)
#VIF(R.A_1F.ma) # se buscan valores inferiores a 4 o 5
# 5. Test para valores influyentes
plot(R.A_1F.ma, 4) # Cuales casos tienen mayor distancia de cook
plot(R.A_1F.ma, 5) # Como ningun valor es mavor a 1.0, se estima que no hay valores influyentes
entren_ma$cook.A_1F <- cooks.distance(R.A_1F.ma)
which(entren_ma$cook.A_1F>1) # no se detectan valores influyentes para la prueba de cook
## named integer(0)
# Hojas jovenes
w.A_1F.hj <- 1 / lm(abs(R.A_1F.hj$residuals) ~ R.A_1F.hj$fitted.values)$fitted.values^2
R.MCP.A_1F.hj <- lm(AF ~ A_1F, data = entren_hj, weights=w.A_1F.hj)
summary(R.MCP.A_1F.hj)
##
## Call:
## lm(formula = AF ~ A_1F, data = entren_hj, weights = w.A_1F.hj)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.0862 -0.9530 -0.3101 0.6311 6.9013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.2297 1.1488 10.65 <2e-16 ***
## A_1F 2.7924 0.2789 10.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.352 on 388 degrees of freedom
## Multiple R-squared: 0.2053, Adjusted R-squared: 0.2033
## F-statistic: 100.2 on 1 and 388 DF, p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.A_1F.hj)
plot(R.MCP.A_1F.hj, 3)
library(lmtest); bptest(R.MCP.A_1F.hj)
##
## studentized Breusch-Pagan test
##
## data: R.MCP.A_1F.hj
## BP = 0.65768, df = 1, p-value = 0.4174
library(car); ncvTest(R.MCP.A_1F.hj)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.7695445, Df = 1, p = 0.38036
hist(R.MCP.A_1F.hj$residuals)
plot(R.MCP.A_1F.hj, 2)
shapiro.test(R.MCP.A_1F.hj$residuals)
##
## Shapiro-Wilk normality test
##
## data: R.MCP.A_1F.hj$residuals
## W = 0.87434, p-value < 2.2e-16
#LillieTest(R.MCP.A_1F.hj$residuals)
plot(R.MCP.A_1F.hj, 4)
# Hojas maduras
w.A_1F.ma <- 1 / lm(abs(R.A_1F.ma$residuals) ~ R.A_1F.ma$fitted.values)$fitted.values^2
R.MCP.A_1F.ma <- lm(AF ~ A_1F, data = entren_ma, weights=w.A_1F.ma)
summary(R.MCP.A_1F.ma)
##
## Call:
## lm(formula = AF ~ A_1F, data = entren_ma, weights = w.A_1F.ma)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.1383 -0.9258 -0.3760 0.4752 18.7645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.6168 0.7514 -16.79 <2e-16 ***
## A_1F 8.1310 0.1165 69.80 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.424 on 2520 degrees of freedom
## Multiple R-squared: 0.6591, Adjusted R-squared: 0.659
## F-statistic: 4872 on 1 and 2520 DF, p-value: < 2.2e-16
library(DescTools); #VIF(R.MCP.A_1F.ma)
plot(R.MCP.A_1F.ma, 3)
library(lmtest); bptest(R.MCP.A_1F.ma)
##
## studentized Breusch-Pagan test
##
## data: R.MCP.A_1F.ma
## BP = 0.5787, df = 1, p-value = 0.4468
library(car); ncvTest(R.MCP.A_1F.ma)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 13.1759, Df = 1, p = 0.00028357
hist(R.MCP.A_1F.ma$residuals)
plot(R.MCP.A_1F.ma, 2)
shapiro.test(R.MCP.A_1F.ma$residuals)
##
## Shapiro-Wilk normality test
##
## data: R.MCP.A_1F.ma$residuals
## W = 0.72561, p-value < 2.2e-16
#LillieTest(R.MCP.A_1F.ma$residuals)
plot(R.MCP.A_1F.ma, 4)
### Predicciones sobre el modelo
# Hojas jovenes
predic_hj <- predict(R.A_1F.hj, newdata = valid_hj)
# Hojas maduras
predic_ma <- predict(R.A_1F.ma, newdata = valid_ma)
# Hojas jovenes
rmse_hj <- sqrt(mean((valid_hj$AF - predic_hj)^2)) # Raíz del error cuadrático medio
rmse_hj
## [1] 11.77434
mae_hj <- mean(abs(valid_hj$AF - predic_hj)) # Error absoluto medio
mae_hj
## [1] 8.307776
r2_hj <- cor(valid_hj$AF, predic_hj)^2 # Coeficiente de determinación (R cuadrado)
r2_hj
## [1] 0.2367429
# Hojas maduras
rmse_ma <- sqrt(mean((valid_ma$AF - predic_ma)^2)) # Raíz del error cuadrático medio
rmse_ma
## [1] 22.00266
mae_ma <- mean(abs(valid_ma$AF - predic_ma)) # Error absoluto medio
mae_ma
## [1] 14.35855
r2_ma <- cor(valid_ma$AF, predic_ma)^2 # Coeficiente de determinación (R cuadrado)
r2_ma
## [1] 0.7428278
## Hojas jovenes
R.LH.A_1F.hj <- lm(formula = AF ~ L_H + A_1F, data = entren_hj)
summary(R.LH.A_1F.hj)
##
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_hj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.729 -2.439 -0.100 2.128 37.321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -16.1619 0.9568 -16.892 < 2e-16 ***
## L_H 3.7178 0.1072 34.676 < 2e-16 ***
## A_1F 1.1323 0.1414 8.009 1.37e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.216 on 387 degrees of freedom
## Multiple R-squared: 0.8384, Adjusted R-squared: 0.8376
## F-statistic: 1004 on 2 and 387 DF, p-value: < 2.2e-16
summary(R.LH.A_1F.hj)$sigma
## [1] 5.215678
## Hojas maduras
R.LH.A_1F.ma <- lm(formula = AF ~ L_H + A_1F, data = entren_ma)
summary(R.LH.A_1F.ma)
##
## Call:
## lm(formula = AF ~ L_H + A_1F, data = entren_ma)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.755 -7.184 -0.363 6.002 135.861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -53.0325 0.7773 -68.23 <2e-16 ***
## L_H 5.6387 0.0974 57.89 <2e-16 ***
## A_1F 3.9137 0.1648 23.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.21 on 2519 degrees of freedom
## Multiple R-squared: 0.9022, Adjusted R-squared: 0.9021
## F-statistic: 1.162e+04 on 2 and 2519 DF, p-value: < 2.2e-16
summary(R.LH.A_1F.ma)$sigma
## [1] 13.20998
#
R.PP.LH <- lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
summary(R.PP.LH)
##
## Call:
## lm(formula = AF ~ L_H + TIPO + 0, data = BD1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.732 -7.080 0.715 6.603 149.437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## L_H 7.49916 0.04814 155.79 <2e-16 ***
## TIPOMadura -46.22588 0.71205 -64.92 <2e-16 ***
## TIPOJoven -47.68527 0.80172 -59.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.36 on 3633 degrees of freedom
## Multiple R-squared: 0.9543, Adjusted R-squared: 0.9543
## F-statistic: 2.531e+04 on 3 and 3633 DF, p-value: < 2.2e-16
summary(R.PP.LH)$sigma
## [1] 14.35716
#
plot(y=BD1$AF,
x=BD1$L_H,
pch=19,
col=factor(BD1$TIPO))
abline(-46.22588, 7.49916, col="black", lty=2)
abline(-47.68527, 7.49916, col="red", lty=2)
#
ggplot(BD1, aes(x = L_H, y = AF, color = factor(TIPO))) +
geom_point(size = 2, alpha=0.2) +
geom_abline(intercept = -46.22588, slope = 7.49916, linetype = "solid", color = "#1b9e77") +
geom_abline(intercept = -47.68527, slope = 7.49916, linetype = "solid", color = "#d95f02") +
labs(x = "L_H", y = "AF", color = "TIPO") +
xlab("Longitud de la hoja (cm)")+
ylab("Promedio de area foliar por hoja ("~"cm"^2~")")+
xlim(c(0,32))+
ylim(c(0,200))+
theme_bw()+
theme(
legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
BD1
## # A tibble: 3,636 × 12
## EST DDE M PL NH AF L_H A_1F A_2F L_FT A_FT TIPO
## <fct> <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 Vanessa 22 1 1 1 12.7 6.96 1.63 NA 4.22 3.4 Joven
## 2 Vanessa 22 1 1 2 88.4 21.7 10.2 7.62 7.27 4.98 Madura
## 3 Vanessa 22 1 1 3 45.7 12.6 7.00 6.63 6.93 3.80 Madura
## 4 Vanessa 22 1 1 4 74.3 22.1 9.68 7.42 6.42 4.93 Madura
## 5 Vanessa 22 1 1 5 79.0 3.04 1.49 NA 2.32 2.17 Joven
## 6 Vanessa 22 1 1 6 94.9 17.4 8.84 10.2 8.10 5.06 Madura
## 7 Vanessa 22 1 1 7 21.2 6.94 4.44 5.65 2.94 1.95 Madura
## 8 Vanessa 22 1 1 8 44.0 11.8 6.77 5.07 4.88 4.06 Madura
## 9 Vanessa 22 1 1 9 67.6 14.3 9.89 8.53 7.5 4.29 Madura
## 10 Vanessa 22 1 1 10 68.4 14.4 10.2 8.65 7.35 4.23 Madura
## # ℹ 3,626 more rows
A <- ggpairs(BD1, columns=6:11, aes(colour=TIPO, alpha=0.2),
upper = list(continuous = wrap("cor", size = 2.5),
lower = list(continuous = "point", size = 0.1, alpha=0.1),
diag = list(continuous = "densityDiag"),
xlab=NULL,
axisLabels = "show") )
A + theme(
panel.background = element_blank(),
strip.background = element_blank(),
panel.border = element_rect(color = "black", fill = NA) # Mantener borde negro
)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 486 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 486 rows containing missing values
## Warning: Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 486 rows containing missing values or values outside the scale range
## (`geom_point()`).