CARGA DE LIBRERÍAS

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(gt)

CARGA DE DATOS

ruta <- "C:/Users/juner/OneDrive/Desktop/r/modelo_normal_msha_4925.xlsx"
datos <- read_excel(ruta)

VARIABLE GRAVEDAD DE LESIÓN

severity <- as.numeric(datos$INJURY_SEVERITY_SCORE)
severity <- na.omit(severity)
histograma_sev <- hist(severity,
                       main = "Gráfica Nº1: Distribución del índice de severidad
                       de accidentes mineros (MSHA)",
                       xlab = "INJURY_SEVERITY_SCORE",
                       ylab = "Cantidad de accidentes",
                       col = "gray")

lis <- histograma_sev$breaks[-length(histograma_sev$breaks)]
lss <- histograma_sev$breaks[-1]
MC_f <- histograma_sev$mids
ni_f <- histograma_sev$counts
hi_f <- (ni_f / sum(ni_f)) * 100
TDFlat_f <- round(data.frame(
  lis, lss, MC_f, ni_f, hi_f
), 2)

fila_total_f <- data.frame(
  lis = "TOTAL",
  lss = "",
  MC_f = "",
  ni_f = sum(ni_f),
  hi_f = round(sum(hi_f), 2)
)

TDFlat_t <- rbind(TDFlat_f, fila_total_f)
tabla_sev <- TDFlat_t %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº2*"),
    subtitle = md("Distribución del índice de severidad de accidentes mineros")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Junert Núñez")
  )

tabla_sev
Tabla Nº2
Distribución del índice de severidad de accidentes mineros
lis lss MC_f ni_f hi_f
10 15 12.5 2 0.04
15 20 17.5 4 0.08
20 25 22.5 22 0.45
25 30 27.5 72 1.46
30 35 32.5 216 4.39
35 40 37.5 422 8.57
40 45 42.5 724 14.70
45 50 47.5 943 19.15
50 55 52.5 959 19.47
55 60 57.5 766 15.55
60 65 62.5 461 9.36
65 70 67.5 223 4.53
70 75 72.5 86 1.75
75 80 77.5 22 0.45
80 85 82.5 2 0.04
85 90 87.5 1 0.02
TOTAL 4925 100.00
Autor: Junert Núñez
u_1 <- mean(severity)
sigma_1 <- sd(severity)
n1 <- length(severity)
hist(severity,
     freq = FALSE,
     breaks = 20,
     main = "Gráfica Nº2: Ajuste del modelo normal al índice de severidad",
     xlab = "INJURY_SEVERITY_SCORE",
     col = "lightgray",
     border = "black")

x <- seq(min(severity), max(severity), 0.01)
curve(dnorm(x, u_1, sigma_1), col = "blue", lwd = 2, add = TRUE)

Fo_1 <- histograma_sev$counts
P1 <- numeric(length(Fo_1))

for (i in 1:length(Fo_1)) {
  P1[i] <- pnorm(histograma_sev$breaks[i+1], u_1, sigma_1) -
           pnorm(histograma_sev$breaks[i], u_1, sigma_1)
}

Fe_1 <- P1 * n1

Fo_1 <- (Fo_1 / n1) * 100
Fe_1 <- (Fe_1 / n1) * 100
plot(Fo_1, Fe_1,
     main = "Gráfica Nº3: Correlación de frecuencias observadas y esperadas",
     xlab = "Frecuencia observada (%)",
     ylab = "Frecuencia esperada (%)",
     col = "blue3")

abline(lm(Fe_1 ~ Fo_1), col = "red", lwd = 2)

CorrelaciOn_1 <- cor(Fo_1, Fe_1) * 100
x2_1 <- sum((Fe_1 - Fo_1)^2 / Fe_1)
grados_libertad_1 <- length(Fo_1) - 1
umbral_aceptacion_1 <- qchisq(0.95, grados_libertad_1)
x2_1 < umbral_aceptacion_1
## [1] TRUE
Variable <- c("INJURY_SEVERITY_SCORE")

tabla_resumen <- data.frame(
  Variable,
  round(CorrelaciOn_1, 2),
  round(x2_1, 2),
  round(umbral_aceptacion_1, 2)
)

colnames(tabla_resumen) <- c(
  "Variable",
  "Test Pearson (%)",
  "Chi Cuadrado",
  "Umbral de aceptación"
)

kable(tabla_resumen,
      format = "markdown",
      caption = "Tabla resumen del test de bondad de ajuste al modelo normal")
Tabla resumen del test de bondad de ajuste al modelo normal
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
INJURY_SEVERITY_SCORE 99.98 0.13 25
Probabilidad_1 <- (pnorm(60, u_1, sigma_1) -
                   pnorm(40, u_1, sigma_1)) * 100
Probabilidad_1
## [1] 68.9316
plot(x, dnorm(x, u_1, sigma_1),
     col = "skyblue3",
     lwd = 2,
     main = "Gráfica Nº4: Cálculo de probabilidades",
     ylab = "Densidad de probabilidad",
     xlab = "INJURY_SEVERITY_SCORE")

x_section <- seq(40, 60, 0.01)
y_section <- dnorm(x_section, u_1, sigma_1)

lines(x_section, y_section, col = "red", lwd = 2)

polygon(c(x_section, rev(x_section)),
        c(y_section, rep(0, length(y_section))),
        col = rgb(1, 0, 0, 0.6))

legend("topright",
       legend = c("Modelo Normal", "Área de Probabilidad"),
       col = c("skyblue3", "red"),
       lwd = 2,
       cex = 0.7)

text(min(x),
     max(dnorm(x, u_1, sigma_1)) * 0.9,
     paste0("Probabilidad = ",
            round(Probabilidad_1, 2), "%"),
     cex = 0.8,
     font = 2)

severity_2 <- severity[severity >= mean(severity)]
Histograma_2 <- hist(severity_2,
     freq = FALSE,
     breaks = 20,
     main = "Gráfica Nº5: Comparación de la realidad con el modelo normal
     del índice de severidad de accidentes (Modelo 2)",
     xlab = "INJURY_SEVERITY_SCORE",
     ylab = "Densidad de probabilidad",
     col = "lightgray",
     border = "black")
severity_ln <- log(severity_2)

u_2 <- mean(severity_ln)
sigma_2 <- sd(severity_ln)
x <- seq(min(severity_2), max(severity_2), 0.01)
curve(dlnorm(x, meanlog = u_2, sdlog = sigma_2),
      col = "darkgreen",
      lwd = 2,
      add = TRUE)

h2 <- length(Histograma_2$counts)
n2 <- length(severity_2)

Fo_2 <- Histograma_2$counts
P2 <- numeric(h2)

for (i in 1:h2) {
  P2[i] <- pnorm(Histograma_2$breaks[i+1], u_2, sigma_2) -
           pnorm(Histograma_2$breaks[i], u_2, sigma_2)
}

Fe_2 <- P2 * n2
Fo_2 <- (Fo_2 / n2) * 100
Fe_2 <- (Fe_2 / n2) * 100
plot(Fo_2, Fe_2,
     main = "Gráfica Nº6: Correlación de frecuencias
     (Modelo Normal 2 – Severidad)",
     xlab = "Frecuencia observada (%)",
     ylab = "Frecuencia esperada (%)",
     col = "blue3")

abline(lm(Fe_2 ~ Fo_2), col = "red", lwd = 2)

Correlacion_2 <- cor(Fo_2, Fe_2) * 100
## Warning in cor(Fo_2, Fe_2): the standard deviation is zero
Correlacion_2
## [1] NA
grados_libertad_2 <- length(Fo_2) - 1
nivel_significancia <- 0.99

x2_2 <- sum((Fe_2 - Fo_2)^2 / Fe_2)
umbral_aceptacion_2 <- qchisq(nivel_significancia, grados_libertad_2)

x2_2 < umbral_aceptacion_2
## [1] NA
Variable <- c("INJURY_SEVERITY_SCORE (Modelo 2)")

tabla_resumen_2 <- data.frame(
  Variable,
  round(Correlacion_2, 2),
  round(x2_2, 2),
  round(umbral_aceptacion_2, 2)
)

colnames(tabla_resumen_2) <- c(
  "Variable",
  "Test Pearson (%)",
  "Chi Cuadrado",
  "Umbral de aceptación"
)

kable(tabla_resumen_2,
      format = "markdown",
      caption = "Tabla resumen del test de bondad – Modelo Normal 2")
Tabla resumen del test de bondad – Modelo Normal 2
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
INJURY_SEVERITY_SCORE (Modelo 2) NA NaN 33.41
Probabilidad_2 <- (pnorm(80, u_2, sigma_2) -
                   pnorm(60, u_2, sigma_2)) * 100
Probabilidad_2
## [1] 0
plot(x, dnorm(x, u_2, sigma_2),
     col = "skyblue3",
     lwd = 2,
     main = "Gráfica Nº7: Cálculo de probabilidades
     (Severidad alta)",
     ylab = "Densidad de probabilidad",
     xlab = "INJURY_SEVERITY_SCORE")

x_section_2 <- seq(60, 80, 0.01)
y_section_2 <- dnorm(x_section_2, u_2, sigma_2)

lines(x_section_2, y_section_2, col = "red", lwd = 2)

polygon(c(x_section_2, rev(x_section_2)),
        c(y_section_2, rep(0, length(y_section_2))),
        col = rgb(1, 0, 0, 0.6))

legend("topright",
       legend = c("Modelo Normal", "Área de Probabilidad"),
       col = c("skyblue3", "red"),
       lwd = 2,
       cex = 0.6)

text(min(x),
     max(dnorm(x, u_2, sigma_2)) * 0.9,
     paste0("Probabilidad = ",
            round(Probabilidad_2, 2), "%"),
     cex = 0.8,
     font = 2)

# Teorema del Límite Central
# Variable: INJURY_SEVERITY_SCORE

# Media aritmética
x <- mean(severity)
x
## [1] 50.22578
# Desviación estándar
sigma <- sd(severity)
sigma
## [1] 9.861413
# Tamaño muestral
n <- length(severity)
n
## [1] 4925
# Error estándar (sigma / sqrt(n))
e <- sigma / sqrt(n)
e
## [1] 0.1405193
# Intervalo de confianza aproximado al 95%
li <- x - 2 * e
li
## [1] 49.94474
ls <- x + 2 * e
ls
## [1] 50.50681
# Tabla resumen TLC
Variable <- "INJURY_SEVERITY_SCORE"

tabla_media <- data.frame(
  round(li, 2),
  Variable,
  round(ls, 2),
  round(e, 4)
)

colnames(tabla_media) <- c(
  "Límite inferior",
  "Media poblacional",
  "Límite superior",
  "Error estándar poblacional"
)

library(knitr)
kable(
  tabla_media,
  format = "markdown",
  caption = "Tabla Nro. 3: Intervalo de confianza para la media poblacional según el Teorema del Límite Central"
)
Tabla Nro. 3: Intervalo de confianza para la media poblacional según el Teorema del Límite Central
Límite inferior Media poblacional Límite superior Error estándar poblacional
49.94 INJURY_SEVERITY_SCORE 50.51 0.1405