database <- read.csv("database-_1_.csv", header = TRUE, sep = ",", dec = ".", check.names = FALSE)
variable_original <- database$`Accident Longitude`
variable_original <- as.numeric(as.character(variable_original))
variable_original <- na.omit(variable_original)
amperaje <- variable_original
cat("Datos cargados correctamente. Total:", length(amperaje))
## Datos cargados correctamente. Total: 2795
cat("\nMínimo:", min(amperaje), "| Máximo:", max(amperaje))
##
## Mínimo: -158.0999 | Máximo: 104.2634
# 1. Regla de Sturges
k <- 1 + (3.322 * log10(length(amperaje)))
k <- floor(k)
# 2. Rango y Amplitud
min_val <- min(amperaje)
max_val <- max(amperaje)
R <- max_val - min_val
A <- R / k
# 3. Límites de Intervalos
Li <- seq(from = min_val, to = max_val - A, by = A)
# Ajustes para asegurar cobertura
if(length(Li) < k) { Li <- c(Li, Li[length(Li)] + A) }
if(max(Li) + A < max_val) { Li <- c(Li, tail(Li, 1) + A) }
Ls <- Li + A
MC <- (Li + Ls) / 2
# 4. Frecuencias (ni)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i == length(Li)) {
ni[i] <- sum(amperaje >= Li[i] & amperaje <= (max_val + 0.0001))
} else {
ni[i] <- sum(amperaje >= Li[i] & amperaje < Ls[i])
}
}
# 5. Resto de columnas
hi <- ni / sum(ni) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)
TDFAmperaje <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
tabla1_sturges <- TDFAmperaje %>%
gt() %>%
tab_header(
title = md("*Tabla 1: Distribución de Frecuencias*"),
subtitle = md("**Variable: Accident Longitude**")
) %>%
cols_label(
Li = "L. Inferior",
Ls = "L. Superior",
MC = "Marca Clase",
ni = "Frec. Abs.",
hi = "Frec. Rel. %",
Niasc = "Ni Asc.",
Nidsc = "Ni Desc.",
Hiasc = "Hi Asc. %",
Hidsc = "Hi Desc. %"
) %>%
fmt_number(columns = c(Li, Ls, MC), decimals = 4) %>%
fmt_number(columns = c(hi, Hiasc, Hidsc), decimals = 2, pattern = "{x}%")
tabla1_sturges
| Tabla 1: Distribución de Frecuencias |
| Variable: Accident Longitude |
| L. Inferior |
L. Superior |
Marca Clase |
Frec. Abs. |
Frec. Rel. % |
Ni Asc. |
Ni Desc. |
Hi Asc. % |
Hi Desc. % |
| −158.0999 |
−136.2363 |
−147.1681 |
14 |
0.50% |
14 |
2795 |
0.50% |
100.00% |
| −136.2363 |
−114.3727 |
−125.3045 |
169 |
6.05% |
183 |
2781 |
6.55% |
99.50% |
| −114.3727 |
−92.5091 |
−103.4409 |
1814 |
64.90% |
1997 |
2612 |
71.45% |
93.45% |
| −92.5091 |
−70.6455 |
−81.5773 |
792 |
28.34% |
2789 |
798 |
99.79% |
28.55% |
| −70.6455 |
−48.7819 |
−59.7137 |
2 |
0.07% |
2791 |
6 |
99.86% |
0.21% |
| −48.7819 |
−26.9183 |
−37.8501 |
0 |
0.00% |
2791 |
4 |
99.86% |
0.14% |
| −26.9183 |
−5.0547 |
−15.9865 |
1 |
0.04% |
2792 |
4 |
99.89% |
0.14% |
| −5.0547 |
16.8090 |
5.8772 |
0 |
0.00% |
2792 |
3 |
99.89% |
0.11% |
| 16.8090 |
38.6726 |
27.7408 |
0 |
0.00% |
2792 |
3 |
99.89% |
0.11% |
| 38.6726 |
60.5362 |
49.6044 |
0 |
0.00% |
2792 |
3 |
99.89% |
0.11% |
| 60.5362 |
82.3998 |
71.4680 |
0 |
0.00% |
2792 |
3 |
99.89% |
0.11% |
| 82.3998 |
104.2634 |
93.3316 |
3 |
0.11% |
2795 |
3 |
100.00% |
0.11% |
total_ni <- sum(TDFAmperaje$ni)
total_hi <- sum(TDFAmperaje$hi)
TDF_Completo <- rbind(
TDFAmperaje,
data.frame(
Li = 0, Ls = 0, MC = 0,
ni = total_ni, hi = total_hi,
Niasc = 0, Nidsc = 0, Hiasc = 0, Hidsc = 0
)
)
tabla2_total <- TDF_Completo %>%
gt() %>%
tab_header(
title = md("*Tabla 2: Distribución Completa*"),
subtitle = md("**Con Totales**")
) %>%
text_transform(
locations = cells_body(columns = Li, rows = nrow(TDF_Completo)),
fn = function(x) "Total"
) %>%
text_transform(
locations = cells_body(columns = c(Ls, MC, Niasc, Nidsc, Hiasc, Hidsc), rows = nrow(TDF_Completo)),
fn = function(x) ""
) %>%
fmt_number(columns = c(Li), decimals = 0) # Truco visual para el texto Total
tabla2_total
| Tabla 2: Distribución Completa |
| Con Totales |
| Li |
Ls |
MC |
ni |
hi |
Niasc |
Nidsc |
Hiasc |
Hidsc |
| −158 |
-136.236319 |
-147.168125 |
14 |
0.50089445 |
14 |
2795 |
0.50 |
100.00 |
| −136 |
-114.372709 |
-125.304514 |
169 |
6.04651163 |
183 |
2781 |
6.55 |
99.50 |
| −114 |
-92.509098 |
-103.440903 |
1814 |
64.90161002 |
1997 |
2612 |
71.45 |
93.45 |
| −93 |
-70.645487 |
-81.577292 |
792 |
28.33631485 |
2789 |
798 |
99.79 |
28.55 |
| −71 |
-48.781876 |
-59.713682 |
2 |
0.07155635 |
2791 |
6 |
99.86 |
0.21 |
| −49 |
-26.918265 |
-37.850071 |
0 |
0.00000000 |
2791 |
4 |
99.86 |
0.14 |
| −27 |
-5.054655 |
-15.986460 |
1 |
0.03577818 |
2792 |
4 |
99.89 |
0.14 |
| −5 |
16.808956 |
5.877151 |
0 |
0.00000000 |
2792 |
3 |
99.89 |
0.11 |
| 17 |
38.672567 |
27.740761 |
0 |
0.00000000 |
2792 |
3 |
99.89 |
0.11 |
| 39 |
60.536178 |
49.604372 |
0 |
0.00000000 |
2792 |
3 |
99.89 |
0.11 |
| 61 |
82.399788 |
71.467983 |
0 |
0.00000000 |
2792 |
3 |
99.89 |
0.11 |
| 82 |
104.263399 |
93.331594 |
3 |
0.10733453 |
2795 |
3 |
100.00 |
0.11 |
| Total |
|
|
2795 |
100.00000000 |
|
|
|
|
hist(amperaje,
main = "Distribución de Longitud (Automático)",
xlab = "Longitud", col = "lightgreen", las=1)

hist(amperaje,
breaks = c(Li, tail(Ls, 1)),
main = "Frecuencia (Sturges)",
col = "darkgreen",
xlab = "Longitud", ylab = "Frecuencia")

boxplot(amperaje,
horizontal = TRUE,
col = "lightgreen",
main = "Boxplot: Accident Longitude")
