database <- read.csv("database-_1_.csv", header = TRUE, sep = ",", dec = ".", check.names = FALSE)
variable_original <- database$`Accident Longitude`
variable_original <- as.numeric(as.character(variable_original))
variable_original <- na.omit(variable_original)

amperaje <- variable_original

cat("Datos cargados correctamente. Total:", length(amperaje))
## Datos cargados correctamente. Total: 2795
cat("\nMínimo:", min(amperaje), "| Máximo:", max(amperaje))
## 
## Mínimo: -158.0999 | Máximo: 104.2634
# 1. Regla de Sturges
k <- 1 + (3.322 * log10(length(amperaje)))
k <- floor(k)

# 2. Rango y Amplitud
min_val <- min(amperaje)
max_val <- max(amperaje)
R <- max_val - min_val
A <- R / k

# 3. Límites de Intervalos
Li <- seq(from = min_val, to = max_val - A, by = A)
# Ajustes para asegurar cobertura
if(length(Li) < k) { Li <- c(Li, Li[length(Li)] + A) }
if(max(Li) + A < max_val) { Li <- c(Li, tail(Li, 1) + A) }

Ls <- Li + A
MC <- (Li + Ls) / 2

# 4. Frecuencias (ni)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
  if (i == length(Li)) {
     ni[i] <- sum(amperaje >= Li[i] & amperaje <= (max_val + 0.0001))
  } else {
     ni[i] <- sum(amperaje >= Li[i] & amperaje < Ls[i]) 
  }
}

# 5. Resto de columnas
hi <- ni / sum(ni) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

TDFAmperaje <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
tabla1_sturges <- TDFAmperaje %>%
  gt() %>%
  tab_header(
    title = md("*Tabla 1: Distribución de Frecuencias*"),
    subtitle = md("**Variable: Accident Longitude**")
  ) %>%
  cols_label(
    Li = "L. Inferior",
    Ls = "L. Superior",
    MC = "Marca Clase",
    ni = "Frec. Abs.",
    hi = "Frec. Rel. %",
    Niasc = "Ni Asc.",
    Nidsc = "Ni Desc.",
    Hiasc = "Hi Asc. %",
    Hidsc = "Hi Desc. %"
  ) %>%
  fmt_number(columns = c(Li, Ls, MC), decimals = 4) %>%
  fmt_number(columns = c(hi, Hiasc, Hidsc), decimals = 2, pattern = "{x}%")

tabla1_sturges
Tabla 1: Distribución de Frecuencias
Variable: Accident Longitude
L. Inferior L. Superior Marca Clase Frec. Abs. Frec. Rel. % Ni Asc. Ni Desc. Hi Asc. % Hi Desc. %
−158.0999 −136.2363 −147.1681 14 0.50% 14 2795 0.50% 100.00%
−136.2363 −114.3727 −125.3045 169 6.05% 183 2781 6.55% 99.50%
−114.3727 −92.5091 −103.4409 1814 64.90% 1997 2612 71.45% 93.45%
−92.5091 −70.6455 −81.5773 792 28.34% 2789 798 99.79% 28.55%
−70.6455 −48.7819 −59.7137 2 0.07% 2791 6 99.86% 0.21%
−48.7819 −26.9183 −37.8501 0 0.00% 2791 4 99.86% 0.14%
−26.9183 −5.0547 −15.9865 1 0.04% 2792 4 99.89% 0.14%
−5.0547 16.8090 5.8772 0 0.00% 2792 3 99.89% 0.11%
16.8090 38.6726 27.7408 0 0.00% 2792 3 99.89% 0.11%
38.6726 60.5362 49.6044 0 0.00% 2792 3 99.89% 0.11%
60.5362 82.3998 71.4680 0 0.00% 2792 3 99.89% 0.11%
82.3998 104.2634 93.3316 3 0.11% 2795 3 100.00% 0.11%
total_ni <- sum(TDFAmperaje$ni)
total_hi <- sum(TDFAmperaje$hi)

TDF_Completo <- rbind(
  TDFAmperaje, 
  data.frame(
    Li = 0, Ls = 0, MC = 0,
    ni = total_ni, hi = total_hi, 
    Niasc = 0, Nidsc = 0, Hiasc = 0, Hidsc = 0
  )
)

tabla2_total <- TDF_Completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla 2: Distribución Completa*"),
    subtitle = md("**Con Totales**")
  ) %>%
  text_transform(
    locations = cells_body(columns = Li, rows = nrow(TDF_Completo)),
    fn = function(x) "Total"
  ) %>%
  text_transform(
    locations = cells_body(columns = c(Ls, MC, Niasc, Nidsc, Hiasc, Hidsc), rows = nrow(TDF_Completo)),
    fn = function(x) "" 
  ) %>%
  fmt_number(columns = c(Li), decimals = 0) # Truco visual para el texto Total

tabla2_total
Tabla 2: Distribución Completa
Con Totales
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
−158 -136.236319 -147.168125 14 0.50089445 14 2795 0.50 100.00
−136 -114.372709 -125.304514 169 6.04651163 183 2781 6.55 99.50
−114 -92.509098 -103.440903 1814 64.90161002 1997 2612 71.45 93.45
−93 -70.645487 -81.577292 792 28.33631485 2789 798 99.79 28.55
−71 -48.781876 -59.713682 2 0.07155635 2791 6 99.86 0.21
−49 -26.918265 -37.850071 0 0.00000000 2791 4 99.86 0.14
−27 -5.054655 -15.986460 1 0.03577818 2792 4 99.89 0.14
−5 16.808956 5.877151 0 0.00000000 2792 3 99.89 0.11
17 38.672567 27.740761 0 0.00000000 2792 3 99.89 0.11
39 60.536178 49.604372 0 0.00000000 2792 3 99.89 0.11
61 82.399788 71.467983 0 0.00000000 2792 3 99.89 0.11
82 104.263399 93.331594 3 0.10733453 2795 3 100.00 0.11
Total 2795 100.00000000
hist(amperaje, 
     main = "Distribución de Longitud (Automático)",
     xlab = "Longitud", col = "lightgreen", las=1)

hist(amperaje, 
     breaks = c(Li, tail(Ls, 1)), 
     main = "Frecuencia (Sturges)",
     col = "darkgreen", 
     xlab = "Longitud", ylab = "Frecuencia")

boxplot(amperaje, 
        horizontal = TRUE, 
        col = "lightgreen", 
        main = "Boxplot: Accident Longitude")