Objetivo 2: Prueba normalidad

Author

Luis La Cruz & German Chacón

Published

September 2, 2024

library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(egg)
Cargando paquete requerido: gridExtra

Adjuntando el paquete: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(tidyverse)
library(ggplot2)
library(ggpmisc)
Cargando paquete requerido: ggpp
Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

Adjuntando el paquete: 'ggpp'

The following object is masked from 'package:ggplot2':

    annotate
library(broom)
library(ggplot2)
library(patchwork)
library(egg)
library(ggpubr)

Adjuntando el paquete: 'ggpubr'

The following objects are masked from 'package:ggpp':

    as_npc, as_npcx, as_npcy

The following object is masked from 'package:egg':

    ggarrange
library(readxl)
library(tidyverse)
library(egg)
library(tidyverse)
dat_clean=read_csv("dat_clean_modified_zscore_anchoveta.csv")


dat_clean$group <- factor(dat_clean$group,      # Reordering group factor levels
                         levels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"),labels = c("3.5", "4", "5", "7.5","10.5","11","12","12.5","13.5"))

dat_clean$Banda <- factor(dat_clean$Banda,
  levels = c("35-45","45-90","90-170","170-260"),labels = c("35-45","45-90","90-170","170-260"))

Prueba de normalidad

Sv (dB)

library(dplyr)
library(ggplot2)
library(rstatix)  # Para las funciones de análisis estadístico

Adjuntando el paquete: 'rstatix'
The following object is masked from 'package:stats':

    filter
# Realizar el test de Kolmogorov-Smirnov por grupo
ks_results <- dat_clean %>%
  group_by(group) %>%
  summarise(ks_test = list(ks.test(Value, "pnorm", mean(Value), sd(Value))))
Warning: There were 9 warnings in `summarise()`.
The first warning was:
ℹ In argument: `ks_test = list(ks.test(Value, "pnorm", mean(Value),
  sd(Value)))`.
ℹ In group 1: `group = 3.5`.
Caused by warning in `ks.test.default()`:
! ties should not be present for the one-sample Kolmogorov-Smirnov test
ℹ Run `dplyr::last_dplyr_warnings()` to see the 8 remaining warnings.
# Extraer los valores p y el estadístico D de ks_results
p_values <- sapply(ks_results$ks_test, function(x) x$p.value)
D_statistic <- sapply(ks_results$ks_test, function(x) x$statistic)

# Imprimir los valores p y el estadístico D
print(p_values)
[1] 1.096674e-11 9.126406e-25 2.085729e-22 8.337250e-09 4.223537e-10
[6] 5.502346e-12 3.519504e-23 7.120773e-18 3.849554e-06
print(D_statistic)
         D          D          D          D          D          D          D 
0.05458658 0.05331154 0.02262344 0.03691462 0.03199572 0.03276593 0.04761686 
         D          D 
0.06235837 0.03264949 
# Función para crear gráfico Q-Q con etiqueta de grupo y estadísticas KS
qq_plot <- function(data, group_var, value_var, p_val, D_stat) {
  group_label <- unique(data[[group_var]])
  qq <- ggplot(data = dat_clean, aes(sample = !!sym(value_var))) +
    geom_qq() +
    geom_qq_line() +
    labs(title = paste("Q-Q Plot -", group_label, 
                       "\nK-S p-value:", formatC(p_val, format = "e", digits = 2), 
                       "\nK-S D statistic:", formatC(D_stat, digits = 2))) +  
    theme_presentation(base_size = 9)
  return(qq)
}

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})
#install.packages("patchwork")

library(patchwork)

# Lista de gráficos Q-Q por grupo
qq_plots2 <- lapply(seq_along(unique(dat_clean$group)), function(i) {
  qq_plot(
    dat_clean %>% filter(group == unique(dat_clean$group)[i]),
    "group",
    "Value",
    p_values[i],
    D_statistic[i]
  )
})

library(patchwork)

# Combinar los gráficos Q-Q en una única figura
combined_plot <- wrap_plots(qq_plots2, nrow = 2)

combined_plot

# Imprimir la figura combinada
print(combined_plot)

ggsave(filename = "Prueba normalidad_objetivo2.png",
  plot = combined_plot,     
  height = 6,             # Specifies the height of the plot in inches
       width = 12,              # Specifies the width of the plot in inches
       dpi = 1000,             # Specifies the resolution in dots per inch
       path = "F:/Tesis abordo/Tesis abordo/Figuras/Objetivo02/",device = "png") 
# Guardar la figura combinada utilizando ggsave

Identificar el tipo de distribución

Sv lineal

library(gamlss)
Cargando paquete requerido: splines
Cargando paquete requerido: gamlss.data

Adjuntando el paquete: 'gamlss.data'
The following object is masked from 'package:datasets':

    sleep
Cargando paquete requerido: gamlss.dist
Cargando paquete requerido: nlme

Adjuntando el paquete: 'nlme'
The following object is masked from 'package:dplyr':

    collapse
Cargando paquete requerido: parallel
Registered S3 method overwritten by 'gamlss':
  method   from
  print.ri bit 
 **********   GAMLSS Version 5.4-22  ********** 
For more on GAMLSS look at https://www.gamlss.com/
Type gamlssNews() to see new features/changes/bug fixes.
library(gamlss.dist)
library(dplyr)


# Cargar la librería gamlss si no está cargada
# install.packages("gamlss")
library(gamlss)

# Eliminar filas con NA en 'response_var', 'Value', o 'group'
dat_clean <- na.omit(dat_clean[c("Value_linear", "group")])

# Definir una lista de distribuciones para probar
distributions <- c("NO","GA")

#distributions es un vector que contiene las distribuciones que deseas probar (por ejemplo, "NO" para Normal, "GA" para Gamma, "IGA" para Inverse Gaussian, "BCPE" para Box-Cox-Power-Exponential, "GIG" para Generalized Inverse Gaussian, entre otras opciones disponibles en gamlss).



# Crear un vector para almacenar los modelos ajustados
models <- vector("list", length(distributions))

# Iterar sobre las distribuciones y ajustar los modelos
for (i in seq_along(distributions)) {
  distribution <- distributions[i]
  model <- gamlss(Value_linear ~ group, data = dat_clean, family = distribution)
  models[[i]] <- model
}
GAMLSS-RS iteration 1: Global Deviance = -1911510 
GAMLSS-RS iteration 2: Global Deviance = -1911510 
GAMLSS-RS iteration 1: Global Deviance = -2622850 
GAMLSS-RS iteration 2: Global Deviance = -2622850 
# Mostrar un resumen de cada modelo y comparar
for (i in seq_along(models)) {
  cat("Distribution:", distributions[i], "\n")
  summary(models[[i]])
  cat("\n")
}
Distribution: NO 
Warning in summary.gamlss(models[[i]]): summary: vcov has failed, option qr is used instead
******************************************************************
Family:  c("NO", "Normal") 

Call:  gamlss(formula = Value_linear ~ group, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  identity
Mu Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.967e-06  1.033e-06   2.873  0.00407 ** 
group4      -7.058e-07  1.240e-06  -0.569  0.56916    
group5      -7.650e-08  1.077e-06  -0.071  0.94339    
group7.5     1.585e-06  1.312e-06   1.208  0.22706    
group10.5    7.685e-07  1.222e-06   0.629  0.52937    
group11      5.392e-06  1.200e-06   4.492 7.07e-06 ***
group12      1.832e-05  1.212e-06  15.116  < 2e-16 ***
group12.5    4.864e-05  1.402e-06  34.701  < 2e-16 ***
group13.5    1.356e-04  1.348e-06 100.551  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -9.594212   0.002068   -4639   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  116908 
Degrees of Freedom for the fit:  10
      Residual Deg. of Freedom:  116898 
                      at cycle:  2 
 
Global Deviance:     -1911510 
            AIC:     -1911490 
            SBC:     -1911393 
******************************************************************

Distribution: GA 
******************************************************************
Family:  c("GA", "Gamma") 

Call:  gamlss(formula = Value_linear ~ group, family = distribution,  
    data = dat_clean) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept) -12.72808    0.02015 -631.816   <2e-16 ***
group4       -0.27171    0.02418  -11.235   <2e-16 ***
group5       -0.02612    0.02101   -1.243    0.214    
group7.5      0.42809    0.02560   16.724   <2e-16 ***
group10.5     0.23036    0.02384    9.665   <2e-16 ***
group11       1.03583    0.02342   44.238   <2e-16 ***
group12       1.97048    0.02364   83.368   <2e-16 ***
group12.5     2.85625    0.02734  104.460   <2e-16 ***
group13.5     3.84383    0.02630  146.134   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.284292   0.001727   164.6   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
No. of observations in the fit:  116908 
Degrees of Freedom for the fit:  10
      Residual Deg. of Freedom:  116898 
                      at cycle:  2 
 
Global Deviance:     -2622850 
            AIC:     -2622830 
            SBC:     -2622733 
******************************************************************