# ================================
# CARGA DE LIBRERÍAS
# ================================
library(readxl)
library(dplyr)
library(knitr)
library(kableExtra)

# ================================
# CARGA DE DATOS
# ================================
ruta <- "C:/Users/juner/OneDrive/Desktop/r/5000_Datos (1).xlsx"
datos <- read_excel(ruta)

# ================================
# VARIABLE GRAVEDAD DE LESIÓN
# ================================
Injury <- as.character(datos$NATURE_INJURY)
Injury <- Injury[!is.na(Injury)]
Injury <- tolower(Injury)

Injury[grep("cut|bruise|abrasion|scratch", Injury)] <- "Leve"
Injury[grep("sprain|strain|twist", Injury)] <- "Moderada"
Injury[grep("fracture|break", Injury)] <- "Grave"
Injury[grep("amputation|crush|burn", Injury)] <- "Muy Grave"
Injury[grep("fatal|death", Injury)] <- "Fatal"

Injury <- factor(
  Injury,
  levels = c("Leve","Moderada","Grave","Muy Grave","Fatal"),
  ordered = TRUE
)

# ================================
# TABLA DE FRECUENCIAS
# ================================
ni <- table(Injury)
hi <- prop.table(ni) * 100

tabla_injury <- data.frame(
  Gravedad = names(ni),
  ni = as.numeric(ni),
  hi = as.numeric(hi),
  P = as.numeric(hi)
)

tabla_injury$Nivel_num <- 1:nrow(tabla_injury)

fila_total <- data.frame(
  Gravedad = "TOTAL",
  ni = sum(tabla_injury$ni),
  hi = sum(tabla_injury$hi),
  P = sum(tabla_injury$P),
  Nivel_num = NA
)

tabla_injury_f <- rbind(tabla_injury, fila_total)
tabla_injury_f
##    Gravedad   ni         hi          P Nivel_num
## 1      Leve 1391  38.026244  38.026244         1
## 2  Moderada 1365  37.315473  37.315473         2
## 3     Grave  664  18.151996  18.151996         3
## 4 Muy Grave  238   6.506288   6.506288         4
## 5     Fatal    0   0.000000   0.000000         5
## 6     TOTAL 3658 100.000000 100.000000        NA
# ================================
# GRÁFICA DE DISTRIBUCIÓN
# ================================
hi_global <- tabla_injury$P[tabla_injury$Gravedad != "TOTAL"]
niveles_num <- tabla_injury$Nivel_num[tabla_injury$Gravedad != "TOTAL"]

barplot(
  hi_global,
  main = "Gráfica N°1: Distribución de probabilidad de la gravedad de lesiones",
  xlab = "Nivel de gravedad",
  ylab = "Probabilidad (%)",
  col = "gray",
  names.arg = niveles_num,
  ylim = c(0, 100)
)

mtext("1=Leve   2=Moderada   3=Grave   4=Muy Grave   5=Fatal",
      side = 1, line = 4, cex = 0.8)

# ================================
# CONJETURA DEL MODELO
# ================================
n <- sum(tabla_injury$ni)
x <- tabla_injury$ni
X <- 1:length(x)

media_observada <- sum(X * x) / n
p <- media_observada / length(x)
q <- 1 - p

P_binomial <- dbinom(X, size = length(x), prob = p)

# ================================
# COMPARACIÓN Fo vs Fe
# ================================
Fo <- (tabla_injury$ni / n) * 100
Fe <- P_binomial * 100

barplot(rbind(Fo, Fe), beside = TRUE,
        col = c("skyblue", "blue"),
        names.arg = tabla_injury$Nivel_num,
        main = "Gráfica N°2: Comparación de la realidad con el modelo binomial",
        ylab = "Probabilidad (%)",
        xlab = "Nivel de gravedad",
        ylim = c(0, 100))

legend("topright", legend = c("Real", "Modelo"),
       fill = c("skyblue", "blue"), cex = 0.8)

# ================================
# TEST DE PEARSON
# ================================
plot(Fo, Fe,
     main = "Gráfica N°3: Correlación de frecuencias",
     xlab = "Frecuencia Observada (%)",
     ylab = "Frecuencia Esperada (%)",
     pch = 19,
     col = "darkblue")

abline(lm(Fe ~ Fo), col = "red", lwd = 2)

Correlacion <- cor(Fo, Fe) * 100

# ================================
# TEST DE CHI-CUADRADO
# ================================
gl <- length(x) - 1
x2 <- sum((Fo - Fe)^2 / Fe)
x2
## [1] 5.810064
vc <- qchisq(0.97, gl)
vc
## [1] 10.7119
x2<vc
## [1] TRUE
# ================================
# TABLA RESUMEN
# ================================
Variable <- c("Gravedad de lesión")

tabla_resumen <- data.frame(
  Variable,
  round(Correlacion,2),
  round(x2,2),
  round(vc,2)
)

colnames(tabla_resumen) <- c("Variable","Test Pearson (%)","Chi Cuadrado","Umbral de aceptación")

kable(tabla_resumen)
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
Gravedad de lesión 96.16 5.81 10.71
# ================================
# PROBABILIDAD 
# ================================
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(1,1,
     paste("      Calculo de probabilidad 
           ¿Cúal es la probabilidad de que 
           ocurra una lesión GRAVE =",
           round(dbinom(3, size = length(x), prob = p) * 100, 2), "%"),
     cex = 1.6, font = 2)