VARIABLE GRAVEDAD DE LESIÓN
severity <- as.numeric(datos$INJURY_SEVERITY_SCORE)
severity <- na.omit(severity)
histograma_sev <- hist(severity,
main = "Gráfica Nº1: Distribución del índice de severidad
de accidentes mineros (MSHA)",
xlab = "INJURY_SEVERITY_SCORE",
ylab = "Cantidad de accidentes",
col = "gray")

lis <- histograma_sev$breaks[-length(histograma_sev$breaks)]
lss <- histograma_sev$breaks[-1]
MC_f <- histograma_sev$mids
ni_f <- histograma_sev$counts
hi_f <- (ni_f / sum(ni_f)) * 100
TDFlat_f <- round(data.frame(
lis, lss, MC_f, ni_f, hi_f
), 2)
fila_total_f <- data.frame(
lis = "TOTAL",
lss = "",
MC_f = "",
ni_f = sum(ni_f),
hi_f = round(sum(hi_f), 2)
)
TDFlat_t <- rbind(TDFlat_f, fila_total_f)
tabla_sev <- TDFlat_t %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("Distribución del índice de severidad de accidentes mineros")
) %>%
tab_source_note(
source_note = md("Autor: Junert Núñez")
)
tabla_sev
| Tabla Nº2 |
| Distribución del índice de severidad de accidentes mineros |
| lis |
lss |
MC_f |
ni_f |
hi_f |
| 10 |
15 |
12.5 |
2 |
0.04 |
| 15 |
20 |
17.5 |
4 |
0.08 |
| 20 |
25 |
22.5 |
22 |
0.45 |
| 25 |
30 |
27.5 |
72 |
1.46 |
| 30 |
35 |
32.5 |
216 |
4.39 |
| 35 |
40 |
37.5 |
422 |
8.57 |
| 40 |
45 |
42.5 |
724 |
14.70 |
| 45 |
50 |
47.5 |
943 |
19.15 |
| 50 |
55 |
52.5 |
959 |
19.47 |
| 55 |
60 |
57.5 |
766 |
15.55 |
| 60 |
65 |
62.5 |
461 |
9.36 |
| 65 |
70 |
67.5 |
223 |
4.53 |
| 70 |
75 |
72.5 |
86 |
1.75 |
| 75 |
80 |
77.5 |
22 |
0.45 |
| 80 |
85 |
82.5 |
2 |
0.04 |
| 85 |
90 |
87.5 |
1 |
0.02 |
| TOTAL |
|
|
4925 |
100.00 |
| Autor: Junert Núñez |
u_1 <- mean(severity)
sigma_1 <- sd(severity)
n1 <- length(severity)
hist(severity,
freq = FALSE,
breaks = 20,
main = "Gráfica Nº2: Ajuste del modelo normal al índice de severidad",
xlab = "INJURY_SEVERITY_SCORE",
col = "lightgray",
border = "black")
x <- seq(min(severity), max(severity), 0.01)
curve(dnorm(x, u_1, sigma_1), col = "blue", lwd = 2, add = TRUE)

Fo_1 <- histograma_sev$counts
P1 <- numeric(length(Fo_1))
for (i in 1:length(Fo_1)) {
P1[i] <- pnorm(histograma_sev$breaks[i+1], u_1, sigma_1) -
pnorm(histograma_sev$breaks[i], u_1, sigma_1)
}
Fe_1 <- P1 * n1
Fo_1 <- (Fo_1 / n1) * 100
Fe_1 <- (Fe_1 / n1) * 100
plot(Fo_1, Fe_1,
main = "Gráfica Nº3: Correlación de frecuencias observadas y esperadas",
xlab = "Frecuencia observada (%)",
ylab = "Frecuencia esperada (%)",
col = "blue3")
abline(lm(Fe_1 ~ Fo_1), col = "red", lwd = 2)

CorrelaciOn_1 <- cor(Fo_1, Fe_1) * 100
x2_1 <- sum((Fe_1 - Fo_1)^2 / Fe_1)
grados_libertad_1 <- length(Fo_1) - 1
umbral_aceptacion_1 <- qchisq(0.95, grados_libertad_1)
x2_1 < umbral_aceptacion_1
## [1] TRUE
Variable <- c("INJURY_SEVERITY_SCORE")
tabla_resumen <- data.frame(
Variable,
round(CorrelaciOn_1, 2),
round(x2_1, 2),
round(umbral_aceptacion_1, 2)
)
colnames(tabla_resumen) <- c(
"Variable",
"Test Pearson (%)",
"Chi Cuadrado",
"Umbral de aceptación"
)
kable(tabla_resumen,
format = "markdown",
caption = "Tabla resumen del test de bondad de ajuste al modelo normal")
Tabla resumen del test de bondad de ajuste al modelo
normal
| INJURY_SEVERITY_SCORE |
99.98 |
0.13 |
25 |
Probabilidad_1 <- (pnorm(60, u_1, sigma_1) -
pnorm(40, u_1, sigma_1)) * 100
Probabilidad_1
## [1] 68.9316
plot(x, dnorm(x, u_1, sigma_1),
col = "skyblue3",
lwd = 2,
main = "Gráfica Nº4: Cálculo de probabilidades",
ylab = "Densidad de probabilidad",
xlab = "INJURY_SEVERITY_SCORE")
x_section <- seq(40, 60, 0.01)
y_section <- dnorm(x_section, u_1, sigma_1)
lines(x_section, y_section, col = "red", lwd = 2)
polygon(c(x_section, rev(x_section)),
c(y_section, rep(0, length(y_section))),
col = rgb(1, 0, 0, 0.6))
legend("topright",
legend = c("Modelo Normal", "Área de Probabilidad"),
col = c("skyblue3", "red"),
lwd = 2,
cex = 0.7)
text(min(x),
max(dnorm(x, u_1, sigma_1)) * 0.9,
paste0("Probabilidad = ",
round(Probabilidad_1, 2), "%"),
cex = 0.8,
font = 2)

severity_2 <- severity[severity >= mean(severity)]
Histograma_2 <- hist(severity_2,
freq = FALSE,
breaks = 20,
main = "Gráfica Nº5: Comparación de la realidad con el modelo normal
del índice de severidad de accidentes (Modelo 2)",
xlab = "INJURY_SEVERITY_SCORE",
ylab = "Densidad de probabilidad",
col = "lightgray",
border = "black")
severity_ln <- log(severity_2)
u_2 <- mean(severity_ln)
sigma_2 <- sd(severity_ln)
x <- seq(min(severity_2), max(severity_2), 0.01)
curve(dlnorm(x, meanlog = u_2, sdlog = sigma_2),
col = "darkgreen",
lwd = 2,
add = TRUE)

h2 <- length(Histograma_2$counts)
n2 <- length(severity_2)
Fo_2 <- Histograma_2$counts
P2 <- numeric(h2)
for (i in 1:h2) {
P2[i] <- pnorm(Histograma_2$breaks[i+1], u_2, sigma_2) -
pnorm(Histograma_2$breaks[i], u_2, sigma_2)
}
Fe_2 <- P2 * n2
Fo_2 <- (Fo_2 / n2) * 100
Fe_2 <- (Fe_2 / n2) * 100
plot(Fo_2, Fe_2,
main = "Gráfica Nº6: Correlación de frecuencias
(Modelo Normal 2 – Severidad)",
xlab = "Frecuencia observada (%)",
ylab = "Frecuencia esperada (%)",
col = "blue3")
abline(lm(Fe_2 ~ Fo_2), col = "red", lwd = 2)

Correlacion_2 <- cor(Fo_2, Fe_2) * 100
## Warning in cor(Fo_2, Fe_2): the standard deviation is zero
Correlacion_2
## [1] NA
grados_libertad_2 <- length(Fo_2) - 1
nivel_significancia <- 0.99
x2_2 <- sum((Fe_2 - Fo_2)^2 / Fe_2)
umbral_aceptacion_2 <- qchisq(nivel_significancia, grados_libertad_2)
x2_2 < umbral_aceptacion_2
## [1] NA
Variable <- c("INJURY_SEVERITY_SCORE (Modelo 2)")
tabla_resumen_2 <- data.frame(
Variable,
round(Correlacion_2, 2),
round(x2_2, 2),
round(umbral_aceptacion_2, 2)
)
colnames(tabla_resumen_2) <- c(
"Variable",
"Test Pearson (%)",
"Chi Cuadrado",
"Umbral de aceptación"
)
kable(tabla_resumen_2,
format = "markdown",
caption = "Tabla resumen del test de bondad – Modelo Normal 2")
Tabla resumen del test de bondad – Modelo Normal 2
| INJURY_SEVERITY_SCORE (Modelo 2) |
NA |
NaN |
33.41 |
Probabilidad_2 <- (pnorm(80, u_2, sigma_2) -
pnorm(60, u_2, sigma_2)) * 100
Probabilidad_2
## [1] 0
plot(x, dnorm(x, u_2, sigma_2),
col = "skyblue3",
lwd = 2,
main = "Gráfica Nº7: Cálculo de probabilidades
(Severidad alta)",
ylab = "Densidad de probabilidad",
xlab = "INJURY_SEVERITY_SCORE")
x_section_2 <- seq(60, 80, 0.01)
y_section_2 <- dnorm(x_section_2, u_2, sigma_2)
lines(x_section_2, y_section_2, col = "red", lwd = 2)
polygon(c(x_section_2, rev(x_section_2)),
c(y_section_2, rep(0, length(y_section_2))),
col = rgb(1, 0, 0, 0.6))
legend("topright",
legend = c("Modelo Normal", "Área de Probabilidad"),
col = c("skyblue3", "red"),
lwd = 2,
cex = 0.6)
text(min(x),
max(dnorm(x, u_2, sigma_2)) * 0.9,
paste0("Probabilidad = ",
round(Probabilidad_2, 2), "%"),
cex = 0.8,
font = 2)

# Teorema del Límite Central
# Variable: INJURY_SEVERITY_SCORE
# Media aritmética
x <- mean(severity)
x
## [1] 50.22578
# Desviación estándar
sigma <- sd(severity)
sigma
## [1] 9.861413
# Tamaño muestral
n <- length(severity)
n
## [1] 4925
# Error estándar (sigma / sqrt(n))
e <- sigma / sqrt(n)
e
## [1] 0.1405193
# Intervalo de confianza aproximado al 95%
li <- x - 2 * e
li
## [1] 49.94474
ls <- x + 2 * e
ls
## [1] 50.50681
# Tabla resumen TLC
Variable <- "INJURY_SEVERITY_SCORE"
tabla_media <- data.frame(
round(li, 2),
Variable,
round(ls, 2),
round(e, 4)
)
colnames(tabla_media) <- c(
"Límite inferior",
"Media poblacional",
"Límite superior",
"Error estándar poblacional"
)
library(knitr)
kable(
tabla_media,
format = "markdown",
caption = "Tabla Nro. 3: Intervalo de confianza para la media poblacional según el Teorema del Límite Central"
)
Tabla Nro. 3: Intervalo de confianza para la media poblacional
según el Teorema del Límite Central
| 49.94 |
INJURY_SEVERITY_SCORE |
50.51 |
0.1405 |