library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)

datos <- read.csv("D:/dataset_CMIO_geologico.csv")

# =========================================================
# VARIABLE ORDINAL
# =========================================================

df_ley <- data.frame(
  nivel_ley = trimws(toupper(datos$Ley_Categoria))
)

# =========================================================
# ORDEN LÓGICO
# =========================================================

orden_ley <- c(
  "BAJA",
  "MEDIA",
  "ALTA",
  "MUY ALTA"
)

# =========================================================
# FACTOR ORDENADO
# =========================================================

df_ley$nivel_ley <- factor(
  df_ley$nivel_ley,
  levels = orden_ley,
  ordered = TRUE
)

# =========================================================
# FRECUENCIAS
# =========================================================

ni <- table(df_ley$nivel_ley)

hi <- round(prop.table(ni), 4)

P <- round(hi * 100, 2)

tabla_finalley <- data.frame(

  Nivel_Ley = names(ni),

  ni = as.numeric(ni),

  hi = as.numeric(hi),

  P = as.numeric(P)
)

# =========================================================
# FILA TOTAL
# =========================================================

fila_total <- data.frame(

  Nivel_Ley = "TOTAL",

  ni = sum(tabla_finalley$ni),

  hi = sum(tabla_finalley$hi),

  P = sum(tabla_finalley$P)
)

tabla_finalley <- rbind(
  tabla_finalley,
  fila_total
)

tabla_finalley
##   Nivel_Ley   ni     hi      P
## 1      BAJA 1024 0.4096  40.96
## 2     MEDIA  975 0.3900  39.00
## 3      ALTA  501 0.2004  20.04
## 4  MUY ALTA    0 0.0000   0.00
## 5     TOTAL 2500 1.0000 100.00
# =========================================================
# TABLA GT
# =========================================================

tabla_ley_gt <- tabla_finalley %>%

  gt() %>%

  tab_header(

    title = md("**Tabla Nº1**"),

    subtitle = md(
      "Distribución ordinal del nivel de ley mineral"
    )
  ) %>%

  tab_source_note(

    source_note = md("Autor: Grupo 2")
  ) %>%

  tab_options(

    table.border.top.color = "black",

    table.border.bottom.color = "black",

    heading.border.bottom.color = "black",

    heading.border.bottom.width = px(2),

    column_labels.border.top.color = "black",

    column_labels.border.bottom.color = "black",

    column_labels.border.bottom.width = px(2),

    table_body.hlines.color = "gray",

    table_body.border.bottom.color = "black",

    row.striping.include_table_body = TRUE
  ) %>%

  tab_style(

    style = cell_text(weight = "bold"),

    locations = cells_body(
      rows = Nivel_Ley == "TOTAL"
    )
  )

tabla_ley_gt
Tabla Nº1
Distribución ordinal del nivel de ley mineral
Nivel_Ley ni hi P
BAJA 1024 0.4096 40.96
MEDIA 975 0.3900 39.00
ALTA 501 0.2004 20.04
MUY ALTA 0 0.0000 0.00
TOTAL 2500 1.0000 100.00
Autor: Grupo 2
# =========================================================
# TAMAÑO MUESTRAL
# =========================================================

n <- sum(
  tabla_finalley$ni[
    tabla_finalley$Nivel_Ley != "TOTAL"
  ]
)

n
## [1] 2500
# =========================================================
# FRECUENCIAS OBSERVADAS
# =========================================================

x <- tabla_finalley$ni[
  tabla_finalley$Nivel_Ley != "TOTAL"
]

x
## [1] 1024  975  501    0
# =========================================================
# ESCALA ORDINAL
# =========================================================

X <- 1:length(x)

X
## [1] 1 2 3 4
# =========================================================
# MEDIA OBSERVADA
# =========================================================

media_observada <- sum(X * x) / n

media_observada
## [1] 1.7908
# =========================================================
# PARÁMETROS
# =========================================================

p <- media_observada / length(x)

p
## [1] 0.4477
q <- 1 - p

q
## [1] 0.5523
# =========================================================
# MODELO BINOMIAL
# =========================================================

P_binomial <- dbinom(
  X,
  size = length(x),
  prob = p
)

P_binomial
## [1] 0.30169785 0.36683902 0.19824230 0.04017431
# =========================================================
# FRECUENCIAS
# =========================================================

Fo <- (x / n) * 100

Fo
## [1] 40.96 39.00 20.04  0.00
Fe <- P_binomial * 100

Fe
## [1] 30.169785 36.683902 19.824230  4.017431
barplot(

  rbind(Fo, Fe),

  beside = TRUE,

  col = c("skyblue", "blue"),

  names.arg = tabla_finalley$Nivel_Ley[
    tabla_finalley$Nivel_Ley != "TOTAL"
  ],

  main = "Gráfica Nº2: Comparación entre la realidad\ny el modelo binomial del nivel de ley",

  ylab = "Probabilidad (%)",

  xlab = "Nivel de ley mineral",

  ylim = c(0,100)
)

legend(

  "topright",

  legend = c("Real", "Modelo"),

  fill = c("skyblue", "blue")
)

plot(

  Fo,
  Fe,

  xlim = c(0, max(Fo)),

  ylim = c(0, max(Fe)),

  main = "Gráfica Nº3: Correlación de frecuencias",

  xlab = "Frecuencia observada (%)",

  ylab = "Frecuencia esperada (%)",

  pch = 19,

  col = "darkblue"
)

abline(
  a = 0,
  b = 1,

  col = "red",

  lwd = 2
)

Correlacion <- cor(Fo, Fe) * 100

Correlacion
## [1] 97.30673
K <- length(x)

K
## [1] 4
gl <- K - 1

gl
## [1] 3
x2 <- sum((Fo - Fe)^2 / Fe)

x2
## [1] 8.025127
vc <- qchisq(0.99, gl)

vc
## [1] 11.34487
x2 < vc
## [1] TRUE
# =========================================================
# ELIMINAR TOTAL
# =========================================================

tabla_sin_total <- tabla_finalley[
  tabla_finalley$Nivel_Ley != "TOTAL",
]

# =========================================================
# PROBABILIDAD DE NIVEL MUY ALTO
# =========================================================

prob_muy_alta <- tabla_sin_total$P[
  tabla_sin_total$Nivel_Ley == "MUY ALTA"
]

prob_muy_alta
## [1] 0
# =========================================================
# GRÁFICO EXPLICATIVO
# =========================================================

plot(
  1,

  type = "n",

  axes = FALSE,

  xlab = "",

  ylab = ""
)

text(

  x = 1,
  y = 1,

  labels = paste(

    "Cálculo de probabilidad\n",
    "(Estimación general)\n\n",

    "¿Qué probabilidad existe de que un depósito\n",
    "mineral analizado en Estados Unidos presente\n",
    "un nivel de ley MUY ALTO?\n\n",

    "Probabilidad = ",
    prob_muy_alta,
    " (%)",

    sep = ""
  ),

  cex = 1.2,

  col = "black",

  font = 2
)

Variable <- c("Nivel de ley mineral")

tabla_resumen <- data.frame(

  Variable,

  round(Correlacion,2),

  round(x2,2),

  round(vc,2)
)

colnames(tabla_resumen) <- c(

  "Variable",

  "Test Pearson (%)",

  "Chi Cuadrado",

  "Umbral de aceptación"
)

library(knitr)

kable(

  tabla_resumen,

  format = "markdown",

  caption = "Tabla Nº2: Resumen de bondad del modelo"
)
Tabla Nº2: Resumen de bondad del modelo
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
Nivel de ley mineral 97.31 8.03 11.34