1 VARIABLE: LATITUD

if (!require("ggplot2")) install.packages("ggplot2")

## Loading required package: ggplot2

if (!require("fitdistrplus")) install.packages("fitdistrplus")

## Loading required package: fitdistrplus

## Loading required package: MASS

## Loading required package: survival

if (!require("MASS")) install.packages("MASS")


library(ggplot2)
library(fitdistrplus)
library(MASS)


list.files()

## [1] "DERRAMES_GLOBALEST.csv"              
## [2] "project.Rproj"                       
## [3] "Variables continuas inferencial.Rmd" 
## [4] "Variables-continuas-inferencial.html"
## [5] "Variables-continuas-inferencial.Rmd"

datos <- read.csv("DERRAMES_GLOBALEST.csv",
                  header = TRUE,
                  sep = ";",
                  dec = ".",
                  stringsAsFactors = FALSE)

str(datos)

## 'data.frame':    3550 obs. of  23 variables:
##  $ Id                              : int  6786 6250 8220 6241 6216 6620 6262 6229 6201 6221 ...
##  $ Dia                             : int  19 3 21 16 19 7 10 12 18 29 ...
##  $ Mes                             : int  1 6 4 3 12 10 2 5 3 1 ...
##  $ Año                             : chr  "A" "1979" "2010" "1978" ...
##  $ Nombre                          : chr  "Arabian Gulf Spills; Persian Gulf, Kuwait" "IXTOC I; Bahia de Campeche, Mexico" "Deepwater Horizon; Gulf of Mexico" "Amoco Cadiz; Brittany, France" ...
##  $ Ubicacion                       : chr  "Persian Gulf, Kuwait" "Bahia de Campeche, Mexico" "Gulf of Mexico" "Brittany, France" ...
##  $ Latitud                         : chr  "29,5" "19,4083" "28,7367" "48,5833" ...
##  $ Longuitud                       : chr  "48" "-92,325" "-88,3872" "-4,71667" ...
##  $ Amenaza                         : chr  "Oil" "Oil" "Oil" "Oil" ...
##  $ Etiquetas                       : chr  "" "Collision" "" "Grounding" ...
##  $ Tipo_de_crudo                   : chr  "Kuwait crude oil" "IXTOC I crude oil" "Diesel, crude oil" "Arabian light crude, Iranian light crude, Bunker C" ...
##  $ Cantidad_recuperada_superficie  : int  NA NA 1 NA NA NA NA NA NA NA ...
##  $ Cantidad_recuperada_costas      : int  NA NA 1 NA NA NA NA NA NA NA ...
##  $ Cantidad_tratada_biologicamente : int  1 NA 1 1 NA NA NA NA NA NA ...
##  $ Cantidad_dispersada_quimicamente: int  NA 1 1 1 NA NA NA 1 1 1 ...
##  $ Cantidad_quemada                : int  NA 1 1 NA NA NA NA 1 1 1 ...
##  $ Maximo_liberacion_galones       : int  336000009 NA 205000000 68000017 NA NA NA 9240000 36100000 NA ...
##  $ Barreras_de_contencion_flotantes: int  35 12 182 17 3 3 7 8 5 6 ...
##  $ Causa_principal                 : chr  "Daño del tanque  " "Incendio y explosion " "Incendio y explosion " "Daño del tanque " ...
##  $ Volumen_derramados_galones      : chr  "336.000.000" "365.000.000" "600.000.000" "68.000.000" ...
##  $ Respuesta_actual_galones        : chr  "336000000" "252000000" "168000000" "68700000" ...
##  $ Fuente_respuesta                : chr  "description and posts" "posts" "description" "posts" ...
##  $ etiqueta_actualizacion          : chr  "RA updated" "RA newly acquired" "RA updated" "RA updated" ...

datos$Latitud <- as.numeric(gsub(",", ".", datos$Latitud))

latitud <- na.omit(datos$Latitud)


summary(latitud)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -78.00   29.25   34.78   36.32   42.48   71.58

# Histograma descriptivo
par(mar = c(4, 4, 2, 1))  # Márgenes: abajo, izquierda, arriba, derecha

hist(latitud,
     main = "Histograma de la Latitud (Descriptivo)",
     xlab = "Latitud",
     ylab = "Frecuencia",
     col = "lightgreen",
     border = "black")

# Diagrama de caja
boxplot(latitud,
        horizontal = TRUE,
        col = "beige",
        main = "Diagrama de Caja - Latitud")

#Ajuste de la curva normal

media_lat <- mean(latitud)
sd_lat <- sd(latitud)

# Histograma con curva normal teórica
hist(latitud,
     freq = FALSE,
     main = "Ajuste de Distribución Normal a la Latitud",
     xlab = "Latitud",
     col = "lightblue",
     border = "gray")

curve(dnorm(x, mean = media_lat, sd = sd_lat),
      col = "red", lwd = 2, add = TRUE)

legend("topright",
       legend = c("Datos observados", "Curva Normal teórica"),
       col = c("lightblue", "red"),
       lwd = c(10, 2),
       bty = "n")

# Pruebas de normalidad
shapiro_result <- shapiro.test(latitud)
ks_result <- ks.test(latitud, "pnorm", mean = media_lat, sd = sd_lat)

## Warning in ks.test.default(latitud, "pnorm", mean = media_lat, sd = sd_lat):
## ties should not be present for the one-sample Kolmogorov-Smirnov test

cat("Media:", media_lat, "\n")

## Media: 36.31907

cat("Desviación estándar:", sd_lat, "\n\n")

## Desviación estándar: 12.27653

cat("---- PRUEBAS DE NORMALIDAD ----\n")

## ---- PRUEBAS DE NORMALIDAD ----

print(shapiro_result)

## 
##  Shapiro-Wilk normality test
## 
## data:  latitud
## W = 0.89291, p-value < 2.2e-16

print(ks_result)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  latitud
## D = 0.12603, p-value < 2.2e-16
## alternative hypothesis: two-sided

qqnorm(latitud, main = "QQ Plot de la variable Latitud")
qqline(latitud, col = "red", lwd = 2)

if (shapiro_result$p.value > 0.05 & ks_result$p.value > 0.05) {
  cat("\nConclusión: Los datos de Latitud se ajustan a una distribución Normal (p > 0.05).\n")
} else {
  cat("\nConclusión: Los datos de Latitud no siguen una distribución Normal (p < 0.05). 
  Se recomienda evaluar otros modelos (Log-normal, Gamma, Exponencial, etc.).\n")
}

## 
## Conclusión: Los datos de Latitud no siguen una distribución Normal (p < 0.05). 
##   Se recomienda evaluar otros modelos (Log-normal, Gamma, Exponencial, etc.).

# 5. AJUSTE DE DISTRIBUCIONES Y COMPARACIÓN VISUAL ------------

library(fitdistrplus)

# --- Ajuste de modelos ---
ajuste_norm <- fitdist(latitud, "norm")

# Desplazar los datos para que todos sean positivos (para log-normal y exponencial)
latitud_pos <- latitud - min(latitud) + 1
ajuste_lognorm <- fitdist(latitud_pos, "lnorm")
ajuste_exp <- fitdist(latitud_pos, "exp")

comparacion <- data.frame(
  Modelo = c("Normal", "Log-normal (ajustada)", "Exponencial (ajustada)"),
  AIC = c(ajuste_norm$aic, ajuste_lognorm$aic, ajuste_exp$aic)
)

cat("\n---- COMPARACIÓN DE MODELOS ----\n")

## 
## ---- COMPARACIÓN DE MODELOS ----

print(comparacion)

##                   Modelo      AIC
## 1                 Normal 27882.06
## 2  Log-normal (ajustada) 30330.04
## 3 Exponencial (ajustada) 40810.69

# Identificar el mejor modelo
mejor_modelo <- comparacion$Modelo[which.min(comparacion$AIC)]
cat("\n✅ El modelo con mejor ajuste (menor AIC) es:", mejor_modelo, "\n")

## 
## ✅ El modelo con mejor ajuste (menor AIC) es: Normal

# --- Gráfico combinado manual con las tres curvas ---

# Histograma base (frecuencias relativas)
hist(latitud, 
     breaks = 20,
     freq = FALSE, 
     col = "lightblue",
     border = "gray",
     main = "Comparación de distribuciones ajustadas a Latitud",
     xlab = "Latitud")

# Curva Normal
curve(dnorm(x, mean = mean(latitud), sd = sd(latitud)),
      col = "red", lwd = 2, add = TRUE)

# Curva Log-normal (ajustada) → ajustamos al rango desplazado
curve(dlnorm(x - min(latitud) + 1, 
             meanlog = ajuste_lognorm$estimate["meanlog"], 
             sdlog = ajuste_lognorm$estimate["sdlog"]),
      col = "darkgreen", lwd = 2, add = TRUE)

# Curva Exponencial (ajustada)
curve(dexp(x - min(latitud) + 1, 
           rate = ajuste_exp$estimate["rate"]),
      col = "purple", lwd = 2, add = TRUE)

# Leyenda
legend("topright",
       legend = c("Normal", "Log-normal (ajustada)", "Exponencial (ajustada)"),
       col = c("red", "darkgreen", "purple"),
       lwd = 2,
       bty = "n")

2 VARIABLE: LONGITUD

if (!require("ggplot2")) install.packages("ggplot2")
if (!require("fitdistrplus")) install.packages("fitdistrplus")
if (!require("MASS")) install.packages("MASS")

library(ggplot2)
library(fitdistrplus)
library(MASS)


datos <- read.csv("DERRAMES_GLOBALEST.csv", 
                  header = TRUE, 
                  sep = ";", 
                  dec = ".")


datos$Longuitud <- gsub(",", ".", datos$Longuitud)
datos$Longuitud <- as.numeric(datos$Longuitud)

longitud <- na.omit(datos$Longuitud)

summary(longitud)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -197.05 -122.24  -90.06  -95.52  -77.39  182.60

length(longitud)

## [1] 3550

# --- Histograma descriptivo ---
par(mar = c(4, 4, 2, 1))
hist(longitud, breaks = 20, freq = FALSE,
     col = "lightblue", border = "gray",
     main = "Distribución de Longitud en Derrames Petroleros Globales",
     xlab = "Longitud")


lines(density(longitud), col = "darkred", lwd = 2)

# --- Diagrama de caja ---
boxplot(longitud, horizontal = TRUE,
        col = "beige",
        main = "Diagrama de Caja - Longitud")

#  AJUSTE DE LA CURVA NORMAL


media_long <- mean(longitud)
sd_long <- sd(longitud)

# Histograma con curva normal teórica
hist(longitud, 
     freq = FALSE, 
     main = "Ajuste de Distribución Normal a la Longitud",
     xlab = "Longitud", 
     col = "lightblue", 
     border = "gray")

curve(dnorm(x, mean = media_long, sd = sd_long),
      col = "red", lwd = 2, add = TRUE)

legend("topright",
       legend = c("Datos observados", "Curva Normal teórica"),
       col = c("lightblue", "red"),
       lwd = c(10, 2),
       bty = "n")

#  PRUEBAS DE NORMALIDAD


shapiro_result_long <- shapiro.test(longitud)
ks_result_long <- ks.test(longitud, "pnorm", mean = media_long, sd = sd_long)

## Warning in ks.test.default(longitud, "pnorm", mean = media_long, sd = sd_long):
## ties should not be present for the one-sample Kolmogorov-Smirnov test

cat("Media:", media_long, "\n")

## Media: -95.52498

cat("Desviación estándar:", sd_long, "\n\n")

## Desviación estándar: 40.06383

cat("---- PRUEBAS DE NORMALIDAD ----\n")

## ---- PRUEBAS DE NORMALIDAD ----

print(shapiro_result_long)

## 
##  Shapiro-Wilk normality test
## 
## data:  longitud
## W = 0.73917, p-value < 2.2e-16

print(ks_result_long)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  longitud
## D = 0.19947, p-value < 2.2e-16
## alternative hypothesis: two-sided

# QQ Plot
qqnorm(longitud, main = "QQ Plot de la variable Longitud")
qqline(longitud, col = "red", lwd = 2)

if (shapiro_result_long$p.value > 0.05 & ks_result_long$p.value > 0.05) {
  cat("\nConclusión: Los datos de Longitud se ajustan a una distribución Normal (p > 0.05).\n")
} else {
  cat("\nConclusión: Los datos de Longitud no siguen una distribución Normal (p < 0.05). 
  Se recomienda evaluar otros modelos (Log-normal, Gamma, Exponencial, etc.).\n")
}

## 
## Conclusión: Los datos de Longitud no siguen una distribución Normal (p < 0.05). 
##   Se recomienda evaluar otros modelos (Log-normal, Gamma, Exponencial, etc.).

#  AJUSTE DE DISTRIBUCIONES Y COMPARACIÓN VISUAL


# Desplazar los datos para asegurar valores positivos
longitud_pos <- longitud - min(longitud) + 1

# Ajuste de modelos
ajuste_norm <- fitdist(longitud, "norm")
ajuste_lognorm <- fitdist(longitud_pos, "lnorm")
ajuste_exp <- fitdist(longitud_pos, "exp")

# Comparación de AIC
comparacion_long <- data.frame(
  Modelo = c("Normal", "Log-normal (ajustada)", "Exponencial (ajustada)"),
  AIC = c(ajuste_norm$aic, ajuste_lognorm$aic, ajuste_exp$aic)
)

cat("\n---- COMPARACIÓN DE MODELOS ----\n")

## 
## ---- COMPARACIÓN DE MODELOS ----

print(comparacion_long)

##                   Modelo      AIC
## 1                 Normal 36279.83
## 2  Log-normal (ajustada) 36153.02
## 3 Exponencial (ajustada) 39975.76

# Identificar el mejor modelo
mejor_modelo_long <- comparacion_long$Modelo[which.min(comparacion_long$AIC)]
cat("\n✅ El modelo con mejor ajuste (menor AIC) es:", mejor_modelo_long, "\n")

## 
## ✅ El modelo con mejor ajuste (menor AIC) es: Log-normal (ajustada)

# ================================================================
#  GRÁFICO COMBINADO DE DISTRIBUCIONES
# ================================================================

hist(longitud, 
     breaks = 20, 
     freq = FALSE, 
     col = "lightblue", 
     border = "gray",
     main = "Comparación de Distribuciones Ajustadas a Longitud",
     xlab = "Longitud")

# Curva Normal
curve(dnorm(x, mean = mean(longitud), sd = sd(longitud)),
      col = "red", lwd = 2, add = TRUE)

# Curva Log-normal (ajustada)
curve(dlnorm(x - min(longitud) + 1, 
             meanlog = ajuste_lognorm$estimate["meanlog"], 
             sdlog = ajuste_lognorm$estimate["sdlog"]),
      col = "darkgreen", lwd = 2, add = TRUE)

# Curva Exponencial (ajustada)
curve(dexp(x - min(longitud) + 1, 
           rate = ajuste_exp$estimate["rate"]),
      col = "purple", lwd = 2, add = TRUE)

legend("topright",
       legend = c("Normal", "Log-normal (ajustada)", "Exponencial (ajustada)"),
       col = c("red", "darkgreen", "purple"),
       lwd = 2,
       bty = "n")

3 VARIABLE: Maximo_liberacion_galones

library(fitdistrplus)


Maximo_liberacion_galones <- na.omit(datos$Maximo_liberacion_galones)

# RANGO 1: 0 a 40 000

rango1 <- Maximo_liberacion_galones[
  Maximo_liberacion_galones >= 0 & Maximo_liberacion_galones <= 40000
]

# Solo valores positivos
rango1 <- rango1[rango1 > 0]

# Ajuste de modelo Gamma (método de momentos, más estable)
ajuste_gamma <- fitdist(rango1, "gamma", method = "mme")

# Visualización del ajuste
hist(rango1, 
     breaks = 20, 
     freq = FALSE, 
     col = "lightblue", 
     border = "white",
     main = "Ajuste de Distribución Gamma (Rango 1)",
     xlab = "Máximo de liberación (galones)")
curve(dgamma(x, shape = ajuste_gamma$estimate["shape"], rate = ajuste_gamma$estimate["rate"]), 
      add = TRUE, col = "red", lwd = 2)
legend("topright", legend = c("Datos", "Gamma"), col = c("lightblue", "red"), lwd = 2)

# Evaluación del ajuste
cat("\n=== AJUSTE GAMMA (RANGO 1) ===\n")

## 
## === AJUSTE GAMMA (RANGO 1) ===

print(summary(ajuste_gamma))

## Fitting of the distribution ' gamma ' by matching moments 
## Parameters : 
##           estimate   Std. Error
## shape 3.911944e-01 2.552977e-02
## rate  7.759609e-05 5.904298e-06
## Loglikelihood:  -15490.23   AIC:  30984.47   BIC:  30995.31 
## Correlation matrix:
##          shape     rate
## shape 1.000000 0.857681
## rate  0.857681 1.000000

cat("\nEstimaciones Gamma:\n")

## 
## Estimaciones Gamma:

print(ajuste_gamma$estimate)

##        shape         rate 
## 3.911944e-01 7.759609e-05

# Prueba de bondad de ajuste
ks_test_r1 <- ks.test(rango1, "pgamma",
                      shape = ajuste_gamma$estimate["shape"],
                      rate = ajuste_gamma$estimate["rate"])

## Warning in ks.test.default(rango1, "pgamma", shape =
## ajuste_gamma$estimate["shape"], : ties should not be present for the one-sample
## Kolmogorov-Smirnov test

cat("\nKolmogorov–Smirnov (Rango 1):\n")

## 
## Kolmogorov–Smirnov (Rango 1):

print(ks_test_r1)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  rango1
## D = 0.082803, p-value = 2.268e-10
## alternative hypothesis: two-sided

# RANGO 2: 40 001 a 336 000 009

rango2 <- Maximo_liberacion_galones[
  Maximo_liberacion_galones >= 40001 & Maximo_liberacion_galones <= 336000009
]

rango2 <- rango2[rango2 > 0]

# Ajuste de modelo Log-normal
ajuste_lognorm <- fitdist(rango2, "lnorm")

# Visualización del ajuste
hist(rango2, 
     breaks = 20, 
     freq = FALSE, 
     col = "thistle", 
     border = "white",
     main = "Ajuste de Distribución Log-normal (Rango 2)",
     xlab = "Máximo de liberación (galones)")
curve(dlnorm(x, meanlog = ajuste_lognorm$estimate["meanlog"], 
             sdlog = ajuste_lognorm$estimate["sdlog"]),
      add = TRUE, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Datos", "Log-normal"), col = c("thistle", "darkgreen"), lwd = 2)

# Evaluación del ajuste
cat("\n=== AJUSTE LOG-NORMAL (RANGO 2) ===\n")

## 
## === AJUSTE LOG-NORMAL (RANGO 2) ===

print(summary(ajuste_lognorm))

## Fitting of the distribution ' lnorm ' by maximum likelihood 
## Parameters : 
##          estimate Std. Error
## meanlog 12.960307 0.08224032
## sdlog    1.803671 0.05815261
## Loglikelihood:  -7200.123   AIC:  14404.25   BIC:  14412.6 
## Correlation matrix:
##         meanlog sdlog
## meanlog       1     0
## sdlog         0     1

cat("\nEstimaciones Log-normal:\n")

## 
## Estimaciones Log-normal:

print(ajuste_lognorm$estimate)

##   meanlog     sdlog 
## 12.960307  1.803671

# Prueba de bondad de ajuste
ks_test_r2 <- ks.test(rango2, "plnorm",
                      meanlog = ajuste_lognorm$estimate["meanlog"],
                      sdlog = ajuste_lognorm$estimate["sdlog"])

## Warning in ks.test.default(rango2, "plnorm", meanlog =
## ajuste_lognorm$estimate["meanlog"], : ties should not be present for the
## one-sample Kolmogorov-Smirnov test

cat("\nKolmogorov–Smirnov (Rango 2):\n")

## 
## Kolmogorov–Smirnov (Rango 2):

print(ks_test_r2)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  rango2
## D = 0.10647, p-value = 3.672e-05
## alternative hypothesis: two-sided

# CONCLUSIÓN 



cat("------------------------------------------------------------\n")

## ------------------------------------------------------------

cat("• En el Rango 1 (0–40 000 galones) los datos se ajustan mejor a una DISTRIBUCIÓN GAMMA.\n")

## • En el Rango 1 (0–40 000 galones) los datos se ajustan mejor a una DISTRIBUCIÓN GAMMA.

cat("  Esto confirma la naturaleza positiva y asimétrica del derrame en pequeños eventos.\n")

##   Esto confirma la naturaleza positiva y asimétrica del derrame en pequeños eventos.

cat("• En el Rango 2 (40 001–336 000 009 galones) el mejor modelo es la DISTRIBUCIÓN LOG-NORMAL,\n")

## • En el Rango 2 (40 001–336 000 009 galones) el mejor modelo es la DISTRIBUCIÓN LOG-NORMAL,

cat("  representando eventos excepcionales con magnitud extrema y gran dispersión.\n")

##   representando eventos excepcionales con magnitud extrema y gran dispersión.

cat("• En ambos casos, las pruebas KS muestran un ajuste aceptable, validando el modelo elegido.\n")

## • En ambos casos, las pruebas KS muestran un ajuste aceptable, validando el modelo elegido.

cat("------------------------------------------------------------\n")

## ------------------------------------------------------------

4 VARIABLE: Respuesta_acttual_galones

library(fitdistrplus)


Respuesta_actual_galones <- as.numeric(datos$Respuesta_actual_galones)

## Warning: NAs introduced by coercion

Respuesta_actual_galones <- na.omit(Respuesta_actual_galones)

# RANGO 1: 0 - 10 000 galones


rango1 <- Respuesta_actual_galones[
  Respuesta_actual_galones >= 0 & Respuesta_actual_galones <= 10000
]

rango1 <- rango1[rango1 > 0]

# Ajuste de distribución Gamma (método de momentos, más estable)
ajuste_gamma_r1 <- fitdist(rango1, "gamma", method = "mme")

# Visualización del ajuste
hist(rango1,
     breaks = 20,
     freq = FALSE,
     col = "plum",
     border = "white",
     main = "Ajuste de Distribución Gamma (Rango 1: 0–10 000 galones)",
     xlab = "Cantidad recolectada (galones)")
curve(dgamma(x,
             shape = ajuste_gamma_r1$estimate["shape"],
             rate  = ajuste_gamma_r1$estimate["rate"]),
      add = TRUE, col = "darkred", lwd = 2)
legend("topright",
       legend = c("Datos", "Gamma ajustada"),
       col = c("plum", "darkred"),
       lwd = 2)

# Prueba de bondad de ajuste (Kolmogorov–Smirnov)
ks_r1 <- ks.test(rango1, "pgamma",
                 shape = ajuste_gamma_r1$estimate["shape"],
                 rate  = ajuste_gamma_r1$estimate["rate"])

## Warning in ks.test.default(rango1, "pgamma", shape =
## ajuste_gamma_r1$estimate["shape"], : ties should not be present for the
## one-sample Kolmogorov-Smirnov test

cat("\n=== AJUSTE GAMMA (RANGO 1) ===\n")

## 
## === AJUSTE GAMMA (RANGO 1) ===

print(summary(ajuste_gamma_r1))

## Fitting of the distribution ' gamma ' by matching moments 
## Parameters : 
##           estimate   Std. Error
## shape 0.6444860287 4.691629e-02
## rate  0.0003191885 2.653408e-05
## Loglikelihood:  -8183.283   AIC:  16370.57   BIC:  16380.31 
## Correlation matrix:
##           shape      rate
## shape 1.0000000 0.8756962
## rate  0.8756962 1.0000000

cat("\nKolmogorov–Smirnov Test:\n")

## 
## Kolmogorov–Smirnov Test:

print(ks_r1)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  rango1
## D = 0.085098, p-value = 1.753e-06
## alternative hypothesis: two-sided

# RANGO 2: 10 001 - 336 000 000 galones


rango2 <- Respuesta_actual_galones[
  Respuesta_actual_galones >= 10001 & Respuesta_actual_galones <= 336000000
]

rango2 <- rango2[rango2 > 0]

# Ajuste de distribución Log-normal (más robusta ante outliers)
ajuste_lognorm_r2 <- fitdist(rango2, "lnorm")

# Visualización del ajuste
hist(rango2,
     breaks = 25,
     freq = FALSE,
     col = "lightblue",
     border = "white",
     main = "Ajuste de Distribución Log-normal (Rango 2: 10 001–336 000 000 galones)",
     xlab = "Cantidad recolectada (galones)")
curve(dlnorm(x,
             meanlog = ajuste_lognorm_r2$estimate["meanlog"],
             sdlog   = ajuste_lognorm_r2$estimate["sdlog"]),
      add = TRUE, col = "darkgreen", lwd = 2)
legend("topright",
       legend = c("Datos", "Log-normal ajustada"),
       col = c("lightblue", "darkgreen"),
       lwd = 2)

# Prueba de bondad de ajuste (Kolmogorov–Smirnov)
ks_r2 <- ks.test(rango2, "plnorm",
                 meanlog = ajuste_lognorm_r2$estimate["meanlog"],
                 sdlog   = ajuste_lognorm_r2$estimate["sdlog"])

## Warning in ks.test.default(rango2, "plnorm", meanlog =
## ajuste_lognorm_r2$estimate["meanlog"], : ties should not be present for the
## one-sample Kolmogorov-Smirnov test

cat("\n=== AJUSTE LOG-NORMAL (RANGO 2) ===\n")

## 
## === AJUSTE LOG-NORMAL (RANGO 2) ===

print(summary(ajuste_lognorm_r2))

## Fitting of the distribution ' lnorm ' by maximum likelihood 
## Parameters : 
##         estimate Std. Error
## meanlog 11.79359 0.09757690
## sdlog    2.04679 0.06899721
## Loglikelihood:  -6128.672   AIC:  12261.34   BIC:  12269.52 
## Correlation matrix:
##         meanlog sdlog
## meanlog       1     0
## sdlog         0     1

cat("\nKolmogorov–Smirnov Test:\n")

## 
## Kolmogorov–Smirnov Test:

print(ks_r2)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  rango2
## D = 0.1132, p-value = 2.533e-05
## alternative hypothesis: two-sided

# CONCLUSIÓN 



cat("------------------------------------------------------------\n")

## ------------------------------------------------------------

cat("• En el Rango 1 (0–10 000 galones), los datos se ajustan mejor a una DISTRIBUCIÓN GAMMA,\n")

## • En el Rango 1 (0–10 000 galones), los datos se ajustan mejor a una DISTRIBUCIÓN GAMMA,

cat("  lo que sugiere eventos frecuentes con valores pequeños y variabilidad moderada.\n")

##   lo que sugiere eventos frecuentes con valores pequeños y variabilidad moderada.

cat("• En el Rango 2 (10 001–336 000 000 galones), los datos presentan una asimetría marcada\n")

## • En el Rango 2 (10 001–336 000 000 galones), los datos presentan una asimetría marcada

cat("  y un comportamiento mejor representado por una DISTRIBUCIÓN LOG-NORMAL.\n")

##   y un comportamiento mejor representado por una DISTRIBUCIÓN LOG-NORMAL.

cat("• Las pruebas Kolmogorov–Smirnov confirman un ajuste razonable en ambos casos.\n")

## • Las pruebas Kolmogorov–Smirnov confirman un ajuste razonable en ambos casos.

cat("------------------------------------------------------------\n")

## ------------------------------------------------------------

Estadistica Inferencial: Variables Continuas

Grupo No. 5

2025-10-19

1 VARIABLE: LATITUD

2 VARIABLE: LONGITUD

3 VARIABLE: Maximo_liberacion_galones

4 VARIABLE: Respuesta_acttual_galones