CARGA DE LIBRERÍAS

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

CARGA DE DATOS

ruta <- "C:/Users/juner/OneDrive/Desktop/r/5000_Datos (1).xlsx"
datos <- read_excel(ruta)

VARIABLE GRAVEDAD DE LESIÓN

Injury <- as.character(datos$NATURE_INJURY)
Injury <- Injury[!is.na(Injury)]
Injury <- tolower(Injury)

Injury[grep("cut|bruise|abrasion|scratch", Injury)] <- "Leve"
Injury[grep("sprain|strain|twist", Injury)] <- "Moderada"
Injury[grep("fracture|break", Injury)] <- "Grave"
Injury[grep("amputation|crush|burn", Injury)] <- "Muy Grave"
Injury[grep("fatal|death", Injury)] <- "Fatal"

Injury <- factor(Injury,
                 levels=c("Leve","Moderada","Grave","Muy Grave","Fatal"),
                 ordered=TRUE)

TABLA DE FRECUENCIAS

ni <- table(Injury)
hi <- prop.table(ni)*100

tabla_injury <- data.frame(
  Gravedad = names(ni),
  ni = as.numeric(ni),
  hi = as.numeric(hi),
  P = as.numeric(hi)
)

tabla_injury$Nivel_num <- 1:nrow(tabla_injury)
kable(tabla_injury)
Gravedad ni hi P Nivel_num
Leve 1391 38.026244 38.026244 1
Moderada 1365 37.315473 37.315473 2
Grave 664 18.151996 18.151996 3
Muy Grave 238 6.506288 6.506288 4
Fatal 0 0.000000 0.000000 5

GRÁFICA DE DISTRIBUCIÓN

barplot(tabla_injury$P,
        names.arg = tabla_injury$Nivel_num,
        col="gray",
        ylim=c(0,100),
        main="Gráfica N°1: Distribución de probabilidad",
        ylab="Probabilidad (%)")

MODELO BINOMIAL

n <- sum(tabla_injury$ni)
x <- tabla_injury$ni
X <- 1:length(x)

media_observada <- sum(X*x)/n
p <- media_observada/length(x)

P_binomial <- dbinom(X, size=length(x), prob=p)

COMPARACIÓN Fo vs Fe

Fo <- (tabla_injury$ni/n)*100
Fe <- P_binomial*100

barplot(rbind(Fo,Fe), beside=TRUE,
        col=c("skyblue","blue"),
        names.arg=tabla_injury$Nivel_num,
        main="Gráfica N°2: Real vs Binomial",
        ylab="Probabilidad (%)")

TEST DE PEARSON

Correlacion <- cor(Fo,Fe)*100
Correlacion
## [1] 96.16498

TEST DE CHI-CUADRADO

gl <- length(x)-1
x2 <- sum((Fo-Fe)^2/Fe)
vc <- qchisq(0.99, gl)

x2
## [1] 5.810064
vc
## [1] 13.2767
x2 < vc
## [1] TRUE

TABLA RESUMEN

tabla_resumen <- data.frame(
  Variable="Gravedad de lesión",
  Pearson=round(Correlacion,2),
  Chi2=round(x2,2),
  Umbral=round(vc,2)
)

kable(tabla_resumen)
Variable Pearson Chi2 Umbral
Gravedad de lesión 96.16 5.81 13.28