Objetivo 1: Prueba normalidad

Author

Luis La Cruz & German Chacón

Published

September 2, 2024

library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Cargando paquete requerido: gridExtra

Adjuntando el paquete: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Cargando paquete requerido: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Adjuntando el paquete: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Adjuntando el paquete: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
dat_clean=read_csv("dat_clean_modified_zscore_especies.csv")
New names:
Rows: 332399 Columns: 9
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(5): Banda, Class, Detect_school, N_Catch_Year, group dbl (4): ...1, Frequency,
Value, Depth_school
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))

dat_clean=dat_clean %>%
  mutate(Value_linear = 10^(Value/10)) %>%
  filter(!is.na(Value_linear))

unique(dat_clean$Class)
[1] "Plancton"     "Vinciguerria" "Salpas"       "Múnida"       "Anchoveta"   

Prueba de normalidad

Logarítmico

Sv (dB)

library(dplyr)
library(ggplot2)
library(rstatix)  # Para las funciones de análisis estadístico

Adjuntando el paquete: 'rstatix'
The following object is masked from 'package:stats':

    filter
# Realizar el test de Kolmogorov-Smirnov por grupo
ks_results <- dat_clean %>%
  group_by(Class) %>%
  summarise(ks_test = list(ks.test(Value, "pnorm", mean(Value), sd(Value))))
Warning: There were 5 warnings in `summarise()`.
The first warning was:
ℹ In argument: `ks_test = list(ks.test(Value, "pnorm", mean(Value),
  sd(Value)))`.
ℹ In group 1: `Class = "Anchoveta"`.
Caused by warning in `ks.test.default()`:
! ties should not be present for the one-sample Kolmogorov-Smirnov test
ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
# Extraer los valores p y el estadístico D de ks_results
p_values <- sapply(ks_results$ks_test, function(x) x$p.value)
D_statistic <- sapply(ks_results$ks_test, function(x) x$statistic)

# Imprimir los valores p y el estadístico D
print(p_values)
[1] 1.707898e-240  1.493239e-14  6.448324e-32  1.241251e-04 1.459260e-296
print(D_statistic)
         D          D          D          D          D 
0.04862262 0.03951914 0.02774585 0.04255259 0.04685382 
# Función para crear gráfico Q-Q con etiqueta de grupo y estadísticas KS
qq_plot <- function(data, group_var, value_var, p_val, D_stat) {
  group_label <- unique(data[[group_var]])
  qq <- ggplot(data = dat_clean, aes(sample = !!sym(value_var))) +
    geom_qq() +
    geom_qq_line() +
    labs(title = paste("Q-Q Plot -", group_label, 
                       "\nK-S p-value:", formatC(p_val, format = "e", digits = 2), 
                       "\nK-S D statistic:", formatC(D_stat, digits = 2))) +  
    theme_presentation(base_size = 12)
  return(qq)
}

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$Class)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$Class)[i]),
    "Class",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})
#install.packages("patchwork")

library(patchwork)

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$Class)), function(i) {
  qq_plot(
    dat_clean %>% filter(Class == unique(dat_clean$Class)[i]),
    "Class",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})

library(patchwork)

# Combinar los gráficos Q-Q en una única figura
combined_plot <- wrap_plots(qq_plots2, nrow = 2)

# Imprimir la figura combinada
print(combined_plot)

# Guardar la figura combinada utilizando ggsave

ggsave(filename = "Prueba normalidad_objetivo1.png",
  plot = combined_plot,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo01/",device = "png") 
# Guardar la figura combinada utilizando ggsave

Identificar el tipo de distribución

Sv lineal

library(gamlss)
Cargando paquete requerido: splines
Cargando paquete requerido: gamlss.data

Adjuntando el paquete: 'gamlss.data'
The following object is masked from 'package:datasets':

    sleep
Cargando paquete requerido: gamlss.dist
Cargando paquete requerido: nlme

Adjuntando el paquete: 'nlme'
The following object is masked from 'package:dplyr':

    collapse
Cargando paquete requerido: parallel
Registered S3 method overwritten by 'gamlss':
  method   from
  print.ri bit 
 **********   GAMLSS Version 5.4-22  ********** 
For more on GAMLSS look at https://www.gamlss.com/
Type gamlssNews() to see new features/changes/bug fixes.
library(gamlss.dist)
library(dplyr)


# Cargar la librería gamlss si no está cargada
# install.packages("gamlss")
library(gamlss)

# Eliminar filas con NA en 'response_var', 'Value', o 'group'
dat_clean <- na.omit(dat_clean[c("Value_linear", "Class")])

# Definir una lista de distribuciones para probar
distributions <- c("NO","GA")

#distributions es un vector que contiene las distribuciones que deseas probar (por ejemplo, "NO" para Normal, "GA" para Gamma, "IGA" para Inverse Gaussian, "BCPE" para Box-Cox-Power-Exponential, "GIG" para Generalized Inverse Gaussian, entre otras opciones disponibles en gamlss).



# Crear un vector para almacenar los modelos ajustados
models <- vector("list", length(distributions))

# Iterar sobre las distribuciones y ajustar los modelos
for (i in seq_along(distributions)) {
  distribution <- distributions[i]
  model <- gamlss(Value_linear ~ Class, data = dat_clean, family = distribution)
  models[[i]] <- model
}
GAMLSS-RS iteration 1: Global Deviance = -5717170 
GAMLSS-RS iteration 2: Global Deviance = -5717170 
GAMLSS-RS iteration 1: Global Deviance = -8105031 
GAMLSS-RS iteration 2: Global Deviance = -8105031 
# Mostrar un resumen de cada modelo y comparar
for (i in seq_along(models)) {
  cat("Distribution:", distributions[i], "\n")
  summary(models[[i]])
  cat("\n")
}
Distribution: NO 
Warning in summary.gamlss(models[[i]]): summary: vcov has failed, option qr is used instead
******************************************************************
Family:  c("NO", "Normal") 

Call:  gamlss(formula = Value_linear ~ Class, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  identity
Mu Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.473e-05  1.303e-07  113.06   <2e-16 ***
ClassMúnida       -1.283e-05  4.556e-07  -28.15   <2e-16 ***
ClassPlancton     -1.420e-05  2.432e-07  -58.38   <2e-16 ***
ClassSalpas       -1.446e-05  8.712e-07  -16.60   <2e-16 ***
ClassVinciguerria -1.327e-05  1.725e-07  -76.89   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.018799   0.001226   -8169   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  332399 
Degrees of Freedom for the fit:  6
      Residual Deg. of Freedom:  332393 
                      at cycle:  2 
 
Global Deviance:     -5717170 
            AIC:     -5717158 
            SBC:     -5717094 
******************************************************************

Distribution: GA 
******************************************************************
Family:  c("GA", "Gamma") 

Call:  gamlss(formula = Value_linear ~ Class, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -11.125422   0.004622 -2406.9   <2e-16 ***
ClassMúnida        -2.045475   0.016162  -126.6   <2e-16 ***
ClassPlancton      -3.313090   0.008626  -384.1   <2e-16 ***
ClassSalpas        -4.006442   0.030905  -129.6   <2e-16 ***
ClassVinciguerria  -2.306668   0.006119  -376.9   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.4576887  0.0009922   461.3   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  332399 
Degrees of Freedom for the fit:  6
      Residual Deg. of Freedom:  332393 
                      at cycle:  2 
 
Global Deviance:     -8105031 
            AIC:     -8105019 
            SBC:     -8104954 
******************************************************************