Prueba normalidad

Author

Luis La Cruz & German Chacón

library(readxl)
library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.3.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Loading required package: gridExtra

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Warning: package 'ggpmisc' was built under R version 4.3.3
Loading required package: ggpp
Warning: package 'ggpp' was built under R version 4.3.3
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Attaching package: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate

Registered S3 method overwritten by 'ggpmisc':
  method                  from   
  as.character.polynomial polynom
library(broom)
Warning: package 'broom' was built under R version 4.3.3
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Attaching package: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
dat_clean=read_csv("dat_clean_modified_zscore_anchoveta.csv")


dat_clean$group <- factor(dat_clean$Subclass_n,      # Reordering group factor levels
                         levels = c("Moda 3.5 cm", "Moda 4 cm", "Moda 5 cm", "Moda 7.5 cm","Moda 10.5 cm", "Moda 11 cm", "Moda 12 cm","Moda 12.5 cm","Moda 13 cm","Moda 13.5 cm"),labels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13","13.5"))

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))

Prueba de normalidad

Sv (dB)

library(dplyr)
library(ggplot2)
library(rstatix)  # Para las funciones de análisis estadístico

Attaching package: 'rstatix'
The following object is masked from 'package:stats':

    filter
# Realizar el test de Kolmogorov-Smirnov por grupo
ks_results <- dat_clean %>%
  group_by(group) %>%
  summarise(ks_test = list(ks.test(Value, "pnorm", mean(Value), sd(Value))))
Warning: There were 10 warnings in `summarise()`.
The first warning was:
ℹ In argument: `ks_test = list(ks.test(Value, "pnorm", mean(Value),
  sd(Value)))`.
ℹ In group 1: `group = 3.5`.
Caused by warning in `ks.test.default()`:
! ties should not be present for the Kolmogorov-Smirnov test
ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
# Extraer los valores p y el estadístico D de ks_results
p_values <- sapply(ks_results$ks_test, function(x) x$p.value)
D_statistic <- sapply(ks_results$ks_test, function(x) x$statistic)

# Imprimir los valores p y el estadístico D
print(p_values)
 [1] 0.000000e+00 0.000000e+00 0.000000e+00 3.175238e-13 0.000000e+00
 [6] 5.502376e-12 0.000000e+00 0.000000e+00 3.133053e-02 3.729361e-05
print(D_statistic)
         D          D          D          D          D          D          D 
0.06127907 0.04418387 0.02060357 0.03477122 0.03581925 0.03276593 0.05382087 
         D          D          D 
0.04980008 0.09117352 0.01948593 
# Función para crear gráfico Q-Q con etiqueta de grupo y estadísticas KS
qq_plot <- function(data, group_var, value_var, p_val, D_stat) {
  group_label <- unique(data[[group_var]])
  qq <- ggplot(data = dat_clean, aes(sample = !!sym(value_var))) +
    geom_qq() +
    geom_qq_line() +
    labs(title = paste("Q-Q Plot -", group_label, 
                       "\nK-S p-value:", formatC(p_val, format = "e", digits = 2), 
                       "\nK-S D statistic:", formatC(D_stat, digits = 2))) +  
    theme_presentation(base_size = 9)
  return(qq)
}

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})
#install.packages("patchwork")

library(patchwork)

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})

library(patchwork)

# Combinar los gráficos Q-Q en una única figura
combined_plot <- wrap_plots(qq_plots2, nrow = 2)

# Imprimir la figura combinada
print(combined_plot)

# Guardar la figura combinada utilizando ggsave

Prueba de normalidad

Sv lineal

library(dplyr)
library(ggplot2)
library(rstatix)  # Para las funciones de análisis estadístico

# Realizar el test de Kolmogorov-Smirnov por grupo
ks_results <- dat_clean %>%
  group_by(group) %>%
  summarise(ks_test = list(ks.test(Value_linear, "pnorm", mean(Value_linear), sd(Value_linear))))
Warning: There were 10 warnings in `summarise()`.
The first warning was:
ℹ In argument: `ks_test = list(ks.test(Value_linear, "pnorm",
  mean(Value_linear), sd(Value_linear)))`.
ℹ In group 1: `group = 3.5`.
Caused by warning in `ks.test.default()`:
! ties should not be present for the Kolmogorov-Smirnov test
ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
# Extraer los valores p y el estadístico D de ks_results
p_values <- sapply(ks_results$ks_test, function(x) x$p.value)
D_statistic <- sapply(ks_results$ks_test, function(x) x$statistic)

# Imprimir los valores p y el estadístico D
print(p_values)
 [1] 0 0 0 0 0 0 0 0 0 0
print(D_statistic)
        D         D         D         D         D         D         D         D 
0.3340008 0.3712167 0.3422565 0.4073929 0.3711752 0.3510336 0.3852706 0.3549797 
        D         D 
0.2943653 0.3854304 
# Función para crear gráfico Q-Q con etiqueta de grupo y estadísticas KS
qq_plot <- function(data, group_var, value_var, p_val, D_stat) {
  group_label <- unique(data[[group_var]])
  qq <- ggplot(data = dat_clean, aes(sample = !!sym(value_var))) +
    geom_qq() +
    geom_qq_line() +
    labs(title = paste("Q-Q Plot -", group_label, 
                       "\nK-S p-value:", formatC(p_val, format = "e", digits = 2), 
                       "\nK-S D statistic:", formatC(D_stat, digits = 2))) +  
    theme_presentation(base_size = 9)
  return(qq)
}

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value_linear",
    p_values[i],
    D_statistic[i]
  )
})
#install.packages("patchwork")

library(patchwork)

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value_linear",
    p_values[i],
    D_statistic[i]
  )
})

library(patchwork)

# Combinar los gráficos Q-Q en una única figura
combined_plot <- wrap_plots(qq_plots2, nrow = 2)

# Imprimir la figura combinada
print(combined_plot)

# Guardar la figura combinada utilizando ggsave

Identificar el tipo de distribución

Sv lineal

library(gamlss)
Warning: package 'gamlss' was built under R version 4.3.3
Loading required package: splines
Loading required package: gamlss.data
Warning: package 'gamlss.data' was built under R version 4.3.3

Attaching package: 'gamlss.data'
The following object is masked from 'package:datasets':

    sleep
Loading required package: gamlss.dist
Warning: package 'gamlss.dist' was built under R version 4.3.3
Loading required package: nlme

Attaching package: 'nlme'
The following object is masked from 'package:dplyr':

    collapse
Loading required package: parallel
Registered S3 method overwritten by 'gamlss':
  method   from
  print.ri bit 
 **********   GAMLSS Version 5.4-22  ********** 
For more on GAMLSS look at https://www.gamlss.com/
Type gamlssNews() to see new features/changes/bug fixes.
library(gamlss.dist)
library(dplyr)


# Cargar la librería gamlss si no está cargada
# install.packages("gamlss")
library(gamlss)

# Eliminar filas con NA en 'response_var', 'Value', o 'group'
dat_clean <- na.omit(dat_clean[c("Value_linear", "group")])

# Definir una lista de distribuciones para probar
distributions <- c("NO","GA")

#distributions es un vector que contiene las distribuciones que deseas probar (por ejemplo, "NO" para Normal, "GA" para Gamma, "IGA" para Inverse Gaussian, "BCPE" para Box-Cox-Power-Exponential, "GIG" para Generalized Inverse Gaussian, entre otras opciones disponibles en gamlss).



# Crear un vector para almacenar los modelos ajustados
models <- vector("list", length(distributions))

# Iterar sobre las distribuciones y ajustar los modelos
for (i in seq_along(distributions)) {
  distribution <- distributions[i]
  model <- gamlss(Value_linear ~ group, data = dat_clean, family = distribution)
  models[[i]] <- model
}
GAMLSS-RS iteration 1: Global Deviance = -2311943 
GAMLSS-RS iteration 2: Global Deviance = -2311943 
GAMLSS-RS iteration 1: Global Deviance = -3317462 
GAMLSS-RS iteration 2: Global Deviance = -3317462 
# Mostrar un resumen de cada modelo y comparar
for (i in seq_along(models)) {
  cat("Distribution:", distributions[i], "\n")
  summary(models[[i]])
  cat("\n")
}
Distribution: NO 
Warning in summary.gamlss(models[[i]]): summary: vcov has failed, option qr is used instead
******************************************************************
Family:  c("NO", "Normal") 

Call:  gamlss(formula = Value_linear ~ group, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  identity
Mu Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.247e-06  1.146e-06   6.326 2.53e-10 ***
group4      -5.397e-07  1.569e-06  -0.344  0.73084    
group5      -3.862e-06  1.277e-06  -3.025  0.00249 ** 
group7.5     1.397e-05  1.621e-06   8.617  < 2e-16 ***
group10.5   -3.363e-07  1.538e-06  -0.219  0.82691    
group11      1.111e-06  1.614e-06   0.688  0.49129    
group12      1.315e-05  1.625e-06   8.087 6.17e-16 ***
group12.5    2.355e-05  1.717e-06  13.719  < 2e-16 ***
group13      9.889e-06  8.091e-06   1.222  0.22166    
group13.5    1.071e-04  1.559e-06  68.686  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -8.974109   0.001808   -4964   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  153004 
Degrees of Freedom for the fit:  11
      Residual Deg. of Freedom:  152993 
                      at cycle:  2 
 
Global Deviance:     -2311943 
            AIC:     -2311921 
            SBC:     -2311811 
******************************************************************

Distribution: GA 
******************************************************************
Family:  c("GA", "Gamma") 

Call:  gamlss(formula = Value_linear ~ group, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept) -11.83490    0.01327 -891.830  < 2e-16 ***
group4       -0.07739    0.01817   -4.259 2.06e-05 ***
group5       -0.76117    0.01479  -51.472  < 2e-16 ***
group7.5      1.07426    0.01878   57.204  < 2e-16 ***
group10.5    -0.04752    0.01782   -2.667  0.00765 ** 
group11       0.14264    0.01870    7.628 2.40e-14 ***
group12       1.03455    0.01883   54.946  < 2e-16 ***
group12.5     1.44687    0.01989   72.761  < 2e-16 ***
group13       0.86056    0.09372    9.182  < 2e-16 ***
group13.5     2.75857    0.01806  152.743  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.383192   0.001482   258.5   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  153004 
Degrees of Freedom for the fit:  11
      Residual Deg. of Freedom:  152993 
                      at cycle:  2 
 
Global Deviance:     -3317462 
            AIC:     -3317440 
            SBC:     -3317331 
******************************************************************