library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(gt)
library(readxl)

datos <- read.csv("D:/ALTERACIONES EXTRAIDAS.csv")

df_material <- data.frame(material = toupper(trimws(datos$CALIFICADOR_MATERIAL)))

df_material$material <- case_when(
  # --- SANO ---
  df_material$material %in% c("SIN ALTERACIÓN") ~ "Sano",
  
  # --- DÉBILMENTE ALTERADO ---
  df_material$material %in% c("ALTERACIÓN LEVE", "SILICIFICACIÓN DÉBIL", 
                               "CLORITIZACIÓN LEVE", "PROPILITIZACIÓN LEVE") ~ "Débilmente alterado",
  
  # --- MODERADAMENTE ALTERADO ---
  df_material$material %in% c("ALTERACIÓN HIDROTERMAL", "ARGILIZACIÓN MODERADA", 
                               "OXIDACIÓN MODERADA", "SILICIFICACIÓN", "ARGILIZACIÓN", 
                               "SERICITIZACIÓN", "CLORITIZACIÓN", "CARBONATIZACIÓN", 
                               "OXIDACIÓN", "PIRITIZACIÓN", "EPIDOTIZACIÓN", "CAOLINIZACIÓN", 
                               "ALBITIZACIÓN", "PROPILITIZACIÓN", "HEMATITITIZACIÓN", 
                               "LIMONITITIZACIÓN", "DOLOMITIZACIÓN") ~ "Moderadamente alterado",
  
  # --- ALTAMENTE ALTERADO ---
  df_material$material %in% c("SERICITIZACIÓN INTENSA", "SILICIFICACIÓN INTENSA", 
                               "SKARNIFICACIÓN") ~ "Altamente alterado",
  
  # --- OTROS (Cualquier categoría no mapeada o celda vacía) ---
  TRUE ~ "Otros"
)

# Definición de orden incluyendo "Otros" al final
orden_material <- c(
  "Sano",
  "Débilmente alterado",
  "Moderadamente alterado",
  "Altamente alterado",
  "Otros"
)

df_material$material <- factor(
  df_material$material,
  levels = orden_material,
  ordered = TRUE
)

# =========================================================
# FRECUENCIAS
# =========================================================

ni <- table(df_material$material)

hi <- round(prop.table(ni), 4)

P <- round(hi * 100, 2)

tabla_finalmaterial <- data.frame(

  Nivel_Alteracion = names(ni),

  ni = as.numeric(ni),

  hi = as.numeric(hi),

  P = as.numeric(P)
)

# =========================================================
# FILA TOTAL
# =========================================================

fila_total <- data.frame(

  Nivel_Alteracion = "TOTAL",

  ni = sum(tabla_finalmaterial$ni),

  hi = sum(tabla_finalmaterial$hi),

  P = sum(tabla_finalmaterial$P)
)

tabla_finalmaterial <- rbind(
  tabla_finalmaterial,
  fila_total
)

tabla_finalmaterial
##         Nivel_Alteracion   ni     hi      P
## 1                   Sano  347 0.1388  13.88
## 2    Débilmente alterado  511 0.2044  20.44
## 3 Moderadamente alterado 1415 0.5660  56.60
## 4     Altamente alterado   90 0.0360   3.60
## 5                  Otros  137 0.0548   5.48
## 6                  TOTAL 2500 1.0000 100.00
# =========================================================
# TABLA GT
# =========================================================

tabla_material_gt <- tabla_finalmaterial %>%

  gt() %>%

  tab_header(

    title = md("**Tabla Nº1**"),

    subtitle = md(
      "Distribución ordinal del grado de alteración del material"
    )
  ) %>%

  tab_source_note(

    source_note = md("Autor: Grupo 2")
  ) %>%

  tab_options(

    table.border.top.color = "black",

    table.border.bottom.color = "black",

    heading.border.bottom.color = "black",

    heading.border.bottom.width = px(2),

    column_labels.border.top.color = "black",

    column_labels.border.bottom.color = "black",

    column_labels.border.bottom.width = px(2),

    table_body.hlines.color = "gray",

    table_body.border.bottom.color = "black",

    row.striping.include_table_body = TRUE
  ) %>%

  tab_style(

    style = cell_text(weight = "bold"),

    locations = cells_body(
      rows = Nivel_Alteracion == "TOTAL"
    )
  )

tabla_material_gt
Tabla Nº1
Distribución ordinal del grado de alteración del material
Nivel_Alteracion ni hi P
Sano 347 0.1388 13.88
Débilmente alterado 511 0.2044 20.44
Moderadamente alterado 1415 0.5660 56.60
Altamente alterado 90 0.0360 3.60
Otros 137 0.0548 5.48
TOTAL 2500 1.0000 100.00
Autor: Grupo 2
# =========================================================
# GRÁFICA DESCRIPTIVA
# =========================================================

P_global <- tabla_finalmaterial$P[
  tabla_finalmaterial$Nivel_Alteracion != "TOTAL"
]

barplot(

  P_global,

  main = "Gráfica Nº1: Distribución ordinal del grado\nde alteración del material",

  xlab = "Nivel de alteración",

  ylab = "Probabilidad (%)",

  col = "blue",

  names.arg = tabla_finalmaterial$Nivel_Alteracion[
    tabla_finalmaterial$Nivel_Alteracion != "TOTAL"
  ],

  las = 2,

  ylim = c(0,100)
)

# =========================================================
# TAMAÑO MUESTRAL
# =========================================================

n <- sum(
  tabla_finalmaterial$ni[
    tabla_finalmaterial$Nivel_Alteracion != "TOTAL"
  ]
)

n
## [1] 2500
# =========================================================
# FRECUENCIAS OBSERVADAS
# =========================================================

x <- tabla_finalmaterial$ni[
  tabla_finalmaterial$Nivel_Alteracion != "TOTAL"
]

x
## [1]  347  511 1415   90  137
# =========================================================
# ESCALA ORDINAL
# =========================================================

X <- 1:length(x)

X
## [1] 1 2 3 4 5
# =========================================================
# MEDIA OBSERVADA
# =========================================================

media_observada <- sum(X * x) / n

media_observada
## [1] 2.6636
# =========================================================
# PARÁMETROS
# =========================================================

p <- media_observada / length(x)

p
## [1] 0.53272
q <- 1 - p

q
## [1] 0.46728
# =========================================================
# MODELO BINOMIAL
# =========================================================

P_binomial <- dbinom(
  X,
  size = length(x),
  prob = p
)

P_binomial
## [1] 0.12699241 0.28955401 0.33010446 0.18816689 0.04290373
# =========================================================
# FRECUENCIAS OBSERVADAS Y ESPERADAS
# =========================================================

Fo <- (x / n) * 100

Fo
## [1] 13.88 20.44 56.60  3.60  5.48
Fe <- P_binomial * 100

Fe
## [1] 12.699241 28.955401 33.010446 18.816689  4.290373
barplot(

  rbind(Fo, Fe),

  beside = TRUE,

  col = c("skyblue", "blue"),

  names.arg = tabla_finalmaterial$Nivel_Alteracion[
    tabla_finalmaterial$Nivel_Alteracion != "TOTAL"
  ],

  main = "Gráfica Nº2: Comparación entre la realidad\ny el modelo binomial",

  ylab = "Probabilidad (%)",

  xlab = "Nivel de alteración",

  ylim = c(0,100),

  las = 2
)

legend(

  "topright",

  legend = c("Real", "Modelo"),

  fill = c("skyblue", "blue")
)

plot(

  Fo,
  Fe,

  xlim = c(0, max(Fo)),

  ylim = c(0, max(Fe)),

  main = "Gráfica Nº3: Correlación de frecuencias",

  xlab = "Frecuencia observada (%)",

  ylab = "Frecuencia esperada (%)",

  pch = 19,

  col = "darkblue"
)

abline(
  a = 0,
  b = 1,

  col = "red",

  lwd = 2
)

Correlacion <- cor(Fo, Fe) * 100

Correlacion
## [1] 76.42185
K <- length(x)

K
## [1] 5
gl <- K - 1

gl
## [1] 4
x2 <- sum((Fo - Fe)^2 / Fe)

x2
## [1] 32.10665
vc <- qchisq(0.99, gl)

vc
## [1] 13.2767
x2 < vc
## [1] FALSE
# =========================================================
# ELIMINAR TOTAL
# =========================================================

tabla_sin_total <- tabla_finalmaterial[
  tabla_finalmaterial$Nivel_Alteracion != "TOTAL",
]

# =========================================================
# PROBABILIDAD DE MATERIAL ALTAMENTE ALTERADO
# =========================================================

prob_altamente <- tabla_sin_total$P[
  tabla_sin_total$Nivel_Alteracion == "Altamente alterado"
]

prob_altamente
## [1] 3.6
# =========================================================
# GRÁFICO EXPLICATIVO
# =========================================================

plot(
  1,

  type = "n",

  axes = FALSE,

  xlab = "",

  ylab = ""
)

text(

  x = 1,
  y = 1,

  labels = paste(

    "Cálculo de probabilidad\n",
    "(Estimación general)\n\n",

    "¿Qué probabilidad existe de que un material\n",
    "analizado en Estados Unidos presente un\n",
    "grado de alteración ALTO?\n\n",

    "Probabilidad = ",
    prob_altamente,
    " (%)",

    sep = ""
  ),

  cex = 1.2,

  col = "black",

  font = 2
)

Variable <- c("Grado de alteración del material")

tabla_resumen <- data.frame(

  Variable,

  round(Correlacion,2),

  round(x2,2),

  round(vc,2)
)

colnames(tabla_resumen) <- c(

  "Variable",

  "Test Pearson (%)",

  "Chi Cuadrado",

  "Umbral de aceptación"
)

library(knitr)

kable(

  tabla_resumen,

  format = "markdown",

  caption = "Tabla Nº2: Resumen de bondad del modelo"
)
Tabla Nº2: Resumen de bondad del modelo
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
Grado de alteración del material 76.42 32.11 13.28