ANÁLISIS INFERENCIAL

CARGA DE DATOS Y LIBRERÍAS

#Limpiar entorno
rm(list = ls())

#Cargar librerías
if (!require("readr")) install.packages("readr")
if (!require("dplyr")) install.packages("dplyr")
if (!require("knitr")) install.packages("knitr")
if (!require("moments")) install.packages("moments")

library(readr)
library(dplyr)
library(knitr)
library(moments)
library(gt)

#Cargar datos
ruta <- "D:/SIO2_with_depth.csv"
datos <- read_csv(ruta)

#====================================================================
# LIMPIEZA DE LA VARIABLE
#====================================================================

profundidad <- as.numeric(datos$TOP_DEPTH_M)

profundidad <- na.omit(profundidad)

#====================================================================
# GRAFICA DE DISTRIBUCIÓN GENERAL
#====================================================================

histograma_prof <- hist(profundidad,
                        main = "Grafica Nº1: Distribución de cantidad de la profundidad de la muestra",
                        xlab = "Profundidad de la muestra (m)",
                        ylab = "Cantidad",
                        col = "gray")

#====================================================================
# TABLA DE DISTRIBUCIÓN DE FRECUENCIAS
#====================================================================

# LÍMITE INFERIOR

lis <- histograma_prof$breaks[1:length(histograma_prof$counts)]

# LÍMITE SUPERIOR

lss <- histograma_prof$breaks[2:(length(histograma_prof$counts)+1)]

# MARCA DE CLASE

MC_prof <- histograma_prof$mids

# FRECUENCIA ABSOLUTA

ni_prof <- histograma_prof$counts

# FRECUENCIA RELATIVA

hi_prof <- (ni_prof/sum(ni_prof))*100

# TABLA BASE

TDFprof <- round(data.frame(
  lis,
  lss,
  MC_prof,
  ni_prof,
  hi_prof
),2)

# FILA TOTAL

fila_total_prof <- data.frame(
  lis = "TOTAL",
  lss = "",
  MC_prof = "",
  ni_prof = sum(TDFprof$ni_prof),
  hi_prof = round(sum(TDFprof$hi_prof),2)
)

TDFprof_total <- rbind(TDFprof,
                       fila_total_prof)

# TABLA FINAL

tabla_profundidad <- TDFprof_total %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº1*"),
    subtitle = md("Tabla de distribución de frecuencias de la profundidad de la muestra")
  ) %>%
  cols_label(
    lis = "Límite inferior",
    lss = "Límite superior",
    MC_prof = "Marca de clase",
    ni_prof = "Frecuencia absoluta",
    hi_prof = "Frecuencia relativa (%)"
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 2")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

tabla_profundidad

Límite inferior	Límite superior	Marca de clase	Frecuencia absoluta	Frecuencia relativa (%)
Tabla Nº1
Tabla de distribución de frecuencias de la profundidad de la muestra
0	50	25	26	1.04
50	100	75	117	4.68
100	150	125	181	7.24
150	200	175	165	6.60
200	250	225	242	9.68
250	300	275	289	11.56
300	350	325	323	12.92
350	400	375	292	11.68
400	450	425	295	11.80
450	500	475	231	9.24
500	550	525	150	6.00
550	600	575	101	4.04
600	650	625	52	2.08
650	700	675	27	1.08
700	750	725	5	0.20
750	800	775	3	0.12
800	850	825	1	0.04
TOTAL			2500	100.00
Autor: Grupo 2

CONJETURA MODELO

#====================================================================
# HISTOGRAMA Y MODELO NORMAL
#====================================================================

prof_1 <- profundidad

Histograma_1 <- hist(prof_1,
                     freq = FALSE,
                     breaks = seq(0, 850, by = 75),
                     main = "Grafica Nº2: Comparación de la realidad con el modelo de probabilidad
                     normal de la profundidad de la muestra",
                     ylab = "Densidad de probabilidad",
                     xlab = "Profundidad de la muestra (m)",
                     col = "lightgray",
                     border = "black")

#====================================================================
# CALCULO DE PARAMETROS
#====================================================================

h1 <- length(Histograma_1$counts)

u_1 <- mean(prof_1)

sigma_1 <- sd(prof_1)

x <- seq(min(prof_1), max(prof_1), 0.01)

curve(dnorm(x, u_1, sigma_1),
      type = "l",
      col = "blue",
      add = TRUE)

#====================================================================
# TAMAÑO MUESTRAL
#====================================================================

n1 <- length(prof_1)

n1

## [1] 2500

#====================================================================
# FRECUENCIA OBSERVADA
#====================================================================

Fo_1 <- Histograma_1$counts

Fo_1

##  [1]  78 246 271 425 474 436 321 161  73  11   4

#====================================================================
# PROBABILIDAD
#====================================================================

P1 <- c(0)

for (i in 1:h1) {

  P1[i] <- (pnorm(Histograma_1$breaks[i+1],u_1,sigma_1)-
            pnorm(Histograma_1$breaks[i],u_1,sigma_1))
}

TEST DE APROBACIÓN

#====================================================================
# FRECUENCIA ESPERADA
#====================================================================

Fe_1 <- P1*n1

Fe_1

##  [1]  66.943907 163.001104 307.081277 447.650497 504.977277 440.818099
##  [7] 297.778295 155.649473  62.948114  19.694432   4.766092

#====================================================================
# TEST DE PEARSON
#====================================================================

#EXPRESAR FE Y FO EN PORCENTAJE

Fo_1 <- (Fo_1/n1)*100

Fo_1

##  [1]  3.12  9.84 10.84 17.00 18.96 17.44 12.84  6.44  2.92  0.44  0.16

Fe_1 <- (Fe_1/n1)*100

Fe_1

##  [1]  2.6777563  6.5200442 12.2832511 17.9060199 20.1990911 17.6327240
##  [7] 11.9111318  6.2259789  2.5179246  0.7877773  0.1906437

#====================================================================
# CORRELACIÓN DE FRECUENCIAS
#====================================================================

plot(Fo_1,
     Fe_1,
     main="Grafica Nº3: Correlación de frecuencias observadas y esperadas
     del modelo normal de la profundidad de la muestra",
     xlab="Frecuencia Observada (%)",
     ylab="Frecuencia Esperada (%)",
     col="blue3")

abline(a = 0,
       b = 1,
       col = "red",
       lwd = 2)

Correlacion_1 <- cor(Fo_1,Fe_1)*100

Correlacion_1

## [1] 98.51127

#====================================================================
# CHI CUADRADO
#====================================================================

grados_libertad_1 <- (length(Histograma_1$counts)-1)

grados_libertad_1

## [1] 10

nivel_significancia <- 0.95

x2_1 <- sum((Fe_1-Fo_1)^2/Fe_1)

x2_1

## [1] 2.35953

umbral_aceptacion_1 <- qchisq(nivel_significancia,
                              grados_libertad_1)

umbral_aceptacion_1

## [1] 18.30704

#====================================================================
# TABLA RESUMEN
#====================================================================

Variable <- c("Profundidad de la muestra")

tabla_resumen_1 <- data.frame(Variable,
                              round(Correlacion_1,2),
                              round(x2_1,2),
                              round(umbral_aceptacion_1,2))

colnames(tabla_resumen_1) <- c("Variable",
                               "Test Pearson (%)",
                               "Chi Cuadrado",
                               "Umbral de aceptación")

kable(tabla_resumen_1,
      format = "markdown",
      caption = "Tabla resumen del modelo normal")

Tabla resumen del modelo normal
Variable	Test Pearson (%)	Chi Cuadrado	Umbral de aceptación
Profundidad de la muestra	98.51	2.36	18.31

CALCULO DE PROBABILIDADES

#====================================================================
# PROBABILIDAD
#====================================================================

#¿Cuál es la probabilidad de que una muestra futura
#se encuentre entre 250 y 450 metros de profundidad?

Probabilidad_1 <- (pnorm(450, u_1, sigma_1) -
                   pnorm(250, u_1, sigma_1)) * 100

Probabilidad_1

## [1] 50.29246

#====================================================================
# GRAFICA DE PROBABILIDAD
#====================================================================

x <- seq(min(prof_1), max(prof_1), 0.01)

plot(x,
     dnorm(x, u_1,sigma_1),
     col = "skyblue3",
     lwd = 1,
     main="Grafica Nº4: Cálculo de probabilidades",
     ylab="Densidad de probabilidad",
     xlab="Profundidad de la muestra (m)")

# Definir rango
x_section <- seq(250,450, 0.001)

y_section <- dnorm(x_section, u_1,sigma_1)

# Pintar sección
lines(x_section,
      y_section,
      col = "red",
      lwd = 2)

polygon(c(x_section, rev(x_section)),
        c(y_section, rep(0, length(y_section))),
        col = rgb(1, 0, 0, 0.6))

# Leyenda
legend("topright",
       legend = c("Modelo Normal", "Área de Probabilidad"),
       col = c("skyblue3", "red"),
       lwd = 2,
       cex = 0.7)

# Texto
texto_prob <- paste0("Probabilidad = ",
                     round(Probabilidad_1, 2), " %")

text(x = 50,
     y = 0.0015,
     labels = texto_prob,
     col = "black",
     cex = 0.8,
     font = 2)

#====================================================================
# CANTIDAD ESPERADA
#====================================================================

#¿De 300 futuras muestras cuántas estarán
#entre 250 y 450 metros de profundidad?

cantidad_1 <- (pnorm(450, u_1, sigma_1) -
               pnorm(250, u_1, sigma_1)) * 300

cantidad_1

## [1] 150.8774

INTERVALOS DE CONFIANZA

#====================================================================
# INTERVALOS DE CONFIANZA
#====================================================================

#Media aritmética

x <- mean(profundidad)

x

## [1] 335.2504

#Desviación estándar

sigma <- sd(profundidad)

sigma

## [1] 146.5103

#Tamaño muestral

n <- length(profundidad)

n

## [1] 2500

#Error estándar

e <- sigma/sqrt(n)

e

## [1] 2.930206

#Intervalo de confianza

li <- x-2*e

li

## [1] 329.39

ls <- x+2*e

ls

## [1] 341.1108

tabla_media <- data.frame(round(li,2),
                          Variable,
                          round(ls,2),
                          round(e,2))

colnames(tabla_media) <- c("Limite inferior",
                           "Media poblacional",
                           "Limite superior",
                           "Error estándar")

kable(tabla_media,
      format = "markdown",
      caption = "Tabla Nro.2: Intervalo de confianza de la media poblacional")

Tabla Nro.2: Intervalo de confianza de la media poblacional
Limite inferior	Media poblacional	Limite superior	Error estándar
329.39	Profundidad de la muestra	341.11	2.93

CONCLUSION

"La profundidad de la muestra presenta un comportamiento aproximadamente
normal, evidenciado por la forma unimodal de la distribución y por la
cercanía existente entre la media aritmética y la mediana.

Los resultados obtenidos muestran un ajuste adecuado al modelo normal,
permitiendo aplicar herramientas de inferencia estadística para estimar
probabilidades relacionadas con la profundidad de muestreo en análisis
geoquímicos y geológicos.

De esta manera, fue posible calcular probabilidades de ocurrencia para
determinados intervalos de profundidad, así como estimar la media
poblacional mediante intervalos de confianza sustentados en el teorema
del límite central."

## [1] "La profundidad de la muestra presenta un comportamiento aproximadamente\nnormal, evidenciado por la forma unimodal de la distribución y por la\ncercanía existente entre la media aritmética y la mediana.\n\nLos resultados obtenidos muestran un ajuste adecuado al modelo normal,\npermitiendo aplicar herramientas de inferencia estadística para estimar\nprobabilidades relacionadas con la profundidad de muestreo en análisis\ngeoquímicos y geológicos.\n\nDe esta manera, fue posible calcular probabilidades de ocurrencia para\ndeterminados intervalos de profundidad, así como estimar la media\npoblacional mediante intervalos de confianza sustentados en el teorema\ndel límite central."

PROFUNDIDAD DE LA MUESTRA MODELO NORMAL

Grupo 2

2026-06-03

ANÁLISIS INFERENCIAL

CARGA DE DATOS Y LIBRERÍAS

CONJETURA MODELO

TEST DE APROBACIÓN

CALCULO DE PROBABILIDADES

INTERVALOS DE CONFIANZA

CONCLUSION