ANÁLISIS ESTADÍSTICO

CARGA DE DATOS Y LIBRERÍAS

CARGA DE DATOS

#Carga de datos
setwd("~/UNI/ESTADISTICA")
datos  <- read.csv("Depositos_Sulfuro.csv", sep = ";", dec = ".", header = TRUE)
datos2 <- read.csv2("Clasificacion depage.csv", sep = ";", dec = ".", header = TRUE)

CARGA DE LIBRERIAS

#Carga de librerias
library(dplyr)
library(gt)
library(knitr)

TABLA DE DISTRIBUCIÓN DE PROBABILIDAD

TABLA DE DISTRIBUCION DE PROBABILIDAD POR EDAD GEOLÓGICA

# TABLA DE FRECUENCIAS – VARIABLE EDAD GEOLOGICA
edad <- datos$depage

ni <- table(edad)
hi <- prop.table(ni) * 100

tabla_final <- data.frame(
  edad = names(ni),
  ni   = as.numeric(ni),
  hi   = as.numeric(hi)
)

fila_total <- data.frame(
  edad = "TOTAL",
  ni   = sum(tabla_final$ni),
  hi   = sum(tabla_final$hi)
)

tabla_final_p <- rbind(tabla_final, fila_total)
tabla_final_p
##                                              edad   ni           hi
## 1        Upper Devonian (378.3\xb139 - 364\xb115)    1   0.09174312
## 2               Upper Triassic (217\xb128, Re-Os)    1   0.09174312
## 3                Miocene-Pliocene (14\xb12 - 2.2)    1   0.09174312
## 4                                                    3   0.27522936
## 5                                         Archean  114  10.45871560
## 6                         Archean-Mid Proterozoic    1   0.09174312
## 7                             Archean-Proterozoic    1   0.09174312
## 8                                        Cambrian   31   2.84403670
## 9                             Cambrian-Ordovician   13   1.19266055
## 10                              Cambrian-Silurian    3   0.27522936
## 11                                    Cambrian(?)    2   0.18348624
## 12                                  Carboniferous   35   3.21100917
## 13                                     Cretaceous   27   2.47706422
## 14                              Cretaceous-Eocene    1   0.09174312
## 15                 Cretaceous or Paleocene-Eocene    1   0.09174312
## 16                                       Devonian   27   2.47706422
## 17                         Devonian-Carboniferous    6   0.55045872
## 18                         Devonian-Mississippian    7   0.64220183
## 19                         Devonian-Pennsylvanian    1   0.09174312
## 20                               Devonian-Permian    1   0.09174312
## 21       Early- Middle Devonian (Emsian-Eifelian)    1   0.09174312
## 22                          Early-Middle Devonian    2   0.18348624
## 23        Early-Middle Devonian (Emsian-Eifelian)    1   0.09174312
## 24         Early-Middle Devonian(Emsian-Eifelian)    1   0.09174312
## 25                                 Early Cambrian    9   0.82568807
## 26                            Early Carboniferous    1   0.09174312
## 27          Early Carboniferous (Visean-Namurian)    4   0.36697248
## 28         Early Carboniferous (Visean-Namurian?)    1   0.09174312
## 29                               Early Cretaceous    6   0.55045872
## 30                                 Early Devonian    5   0.45871560
## 31                        Early Devonian (Emsian)    8   0.73394495
## 32                               Early Ordovician    1   0.09174312
## 33                              Early Proterozoic    1   0.09174312
## 34                                 Early Silurian    1   0.09174312
## 35                    Early Silurian (Llandovery)    2   0.18348624
## 36                     Early Silurian(Llandovery)    1   0.09174312
## 37                     Early to Middle Cretaceous    1   0.09174312
## 38                                         Eocene    8   0.73394495
## 39                          Eocene-Middle Miocene    1   0.09174312
## 40                                Eocene-Pliocene    8   0.73394495
## 41                                       Jurassic   22   2.01834862
## 42                      Jurassic-Early Cretaceous    1   0.09174312
## 43                      Jurassic-Lower Cretaceous    4   0.36697248
## 44                         Jurassic or Cretaceous    1   0.09174312
## 45                           Late-Middle Triassic    1   0.09174312
## 46                               Late  Ordovician    1   0.09174312
## 47                                   Late Archean   27   2.47706422
## 48                 Late Cambrian-Early Ordovician    1   0.09174312
## 49                             Late Carboniferous    2   0.18348624
## 50                                Late Cretaceous    1   0.09174312
## 51                     Late Cretaceous (Turonian)    4   0.36697248
## 52                                  Late Devonian    3   0.27522936
## 53                       Late Devonian (Frasnian)    3   0.27522936
## 54                                  Late Jurassic    1   0.09174312
## 55                 Late Jurassic-Lower Cretaceous    4   0.36697248
## 56                                Late Ordovician    6   0.55045872
## 57                                   Late Permian    2   0.18348624
## 58                    Late Permian-Early Triassic    1   0.09174312
## 59                               Late Precambrian    1   0.09174312
## 60                               Late Proterozoic    1   0.09174312
## 61                                  Late Silurian    1   0.09174312
## 62                   Late Silurian-Early Devonian    2   0.18348624
## 63                   Late Silurian-Lower Devonian    1   0.09174312
## 64                                  Late Triassic    1   0.09174312
## 65                    Late Triassic-Late Jurassic    1   0.09174312
## 66                     Lower-Middle Carboniferous    1   0.09174312
## 67  Lower-Middle Devonian (Emsian-early Eifelian)    1   0.09174312
## 68        Lower-Middle Devonian (Emsian-Eifelian)    2   0.18348624
## 69                          Lower-Middle Jurassic    1   0.09174312
## 70                               Lower Cretaceous   14   1.28440367
## 71                                 Lower Devonian    1   0.09174312
## 72                     Lower Devonian (407, U-Pb)    1   0.09174312
## 73                        Lower Devonian (Emsian)    2   0.18348624
## 74                   Lower Devonian (late Emsian)    2   0.18348624
## 75                                 Lower Jurassic    2   0.18348624
## 76                        Lower or mid-Cretaceous    1   0.09174312
## 77                               Lower Ordovician    7   0.64220183
## 78                   Lower Paleocene-Upper Eocene    2   0.18348624
## 79                                  Lower Permian    1   0.09174312
## 80     Lower Permian-Late Carboniferous (292-283)    1   0.09174312
## 81                              Lower Proterozoic   16   1.46788991
## 82                                 Lower Tertiary    5   0.45871560
## 83                            Meso-Neoproterozoic    1   0.09174312
## 84                                Mesoproterozoic    3   0.27522936
## 85             Mesoproterozoic (1216-1026, Sm-Nd)    1   0.09174312
## 86                    Mesoproterozoic? (Riphean?)    1   0.09174312
## 87                                       Mesozoic    2   0.18348624
## 88                                 Mid Cretaceous    1   0.09174312
## 89       Middle-Late Devonian (Givetian-Frasnian)    1   0.09174312
## 90                          Middle-Upper Devonian    3   0.27522936
## 91      Middle-Upper Devonian (Givetian-Frasnian)    6   0.55045872
## 92                          Middle-Upper Jurassic    1   0.09174312
## 93                                Middle Cambrian    5   0.45871560
## 94                                Middle Devonian   23   2.11009174
## 95            Middle Devonian (Eifelian-Givetian)   12   1.10091743
## 96   Middle Devonian (Eifelian to early Givetian)    1   0.09174312
## 97                     Middle Devonian (Eifelian)   13   1.19266055
## 98              Middle Devonian (Emsian-Eifelian)    1   0.09174312
## 99                     Middle Devonian (Givetian)    4   0.36697248
## 100               Middle Devonian (Late Givetian)    1   0.09174312
## 101              Middle Devonian (lower Givetian)    1   0.09174312
## 102                     Middle Devonian(Givetian)    2   0.18348624
## 103               Middle Jurassic (late Bajocian)    3   0.27522936
## 104                                Middle Miocene    1   0.09174312
## 105                             Middle Ordovician    2   0.18348624
## 106                            Middle Proterozoic    2   0.18348624
## 107                           Middle Proterozoic?    1   0.09174312
## 108                                       Miocene   39   3.57798165
## 109                                      Miocene?    1   0.09174312
## 110                                 Mississippian    4   0.36697248
## 111      Neoarchean-Paleoproterozoic (2.8-2.6 Ga)    1   0.09174312
## 112                                Neoproterozoic    3   0.27522936
## 113                  Neoproterozoic (976.4-802.3)    1   0.09174312
## 114             Neoproterozoic (Early Paleozoic?)    1   0.09174312
## 115                        Neoproterozoic, Sinian    1   0.09174312
## 116                                     Oligocene    1   0.09174312
## 117                                    Ordovician  108   9.90825688
## 118                           Ordovician-Silurian    5   0.45871560
## 119                     Ordovician to Precambrian    1   0.09174312
## 120                                   Orodivician    1   0.09174312
## 121                              Paleoproterozoic    2   0.18348624
## 122                  Paleoproterozoic (1700-2000)    2   0.18348624
## 123                                     Paleozoic   29   2.66055046
## 124                        Paleozoic-Mid Mesozoic    1   0.09174312
## 125                            Paleozoic-Triassic    1   0.09174312
## 126                         Pennsylvanian-Permian    1   0.09174312
## 127                                       Permian    9   0.82568807
## 128                            Permian-Cretaceous    1   0.09174312
## 129                              Permian-Triassic    2   0.18348624
## 130                                      Pliocene    1   0.09174312
## 131                                    pre-Eocene    1   0.09174312
## 132                                  Pre-Tertiary    1   0.09174312
## 133                                   Precambrian    2   0.18348624
## 134                     Precambrian-Late Cambrian    2   0.18348624
## 135                    Precambrian Z (570-800 Ma)    1   0.09174312
## 136                                   Proterozoic  154  14.12844037
## 137                     Proterozoic or Ordovician    2   0.18348624
## 138                                      Silurian   39   3.57798165
## 139                             Silurian-Devonian    2   0.18348624
## 140                     Silurian to Carboniferous    1   0.09174312
## 141                                      Tertiary    1   0.09174312
## 142                                      Triassic    5   0.45871560
## 143                             Triassic-Jurassic    1   0.09174312
## 144                          Triassic or Jurassic    1   0.09174312
## 145            Upper-Devonian-Lower Carboniferous    1   0.09174312
## 146                           Upper Carboniferous    1   0.09174312
## 147                              Upper Cretaceous   29   2.66055046
## 148                                Upper Devonian    2   0.18348624
## 149           Upper Devonian- Lower Carboniferous    1   0.09174312
## 150             Upper Devonian-Lower Carboniferou    2   0.18348624
## 151            Upper Devonian-Lower Carboniferous   27   2.47706422
## 152                    Upper Devonian (Famennian)    2   0.18348624
## 153                     Upper Devonian (Frasnian)    1   0.09174312
## 154                                Upper Jurassic    1   0.09174312
## 155               Upper Jurassic-Lower Cretaceous    4   0.36697248
## 156                             Upper Proterozoic    2   0.18348624
## 157                                Upper Silurian    1   0.09174312
## 158                                Upper Triassic    1   0.09174312
## 159                    Upper Devonian (373\xb115)    1   0.09174312
## 160                                         TOTAL 1090 100.00000000

TABLA DE DISTRIBUCIÓN DE PROBABILIDAD AGRUPADA

Debido a que la tabla presenta numerosos registros de edad geológicas , se decidió agruparlos por eras geológicas convirtiendose en una variable ordinal

TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA

# VARIABLE ERA GEOLÓGICA 

Era <- as.character(datos2$Classificacion)

Era <- Era[!is.na(Era)]

Era <- chartr("áéíóúÁÉÍÓÚ", "aeiouAEIOU", Era)

Era <- gsub("precambico|precámbico", "Precambrico", Era, ignore.case = TRUE)
Era <- gsub("paleozoico", "Paleozoico", Era, ignore.case = TRUE)
Era <- gsub("mesozoico", "Mesozoico", Era, ignore.case = TRUE)
Era <- gsub("cenozoico", "Cenozoico", Era, ignore.case = TRUE)

Era <- factor(
  Era,
  levels = c("Precambrico", "Paleozoico", "Mesozoico", "Cenozoico"),
  ordered = TRUE
)


# TABLA DE DISTRIBUCIÓN DE FRECUENCIAS – ERA

ni <- table(Era)
hi <- round(prop.table(ni), 3)

tabla_era <- data.frame(
  Era = names(ni),
  ni  = as.numeric(ni),
  hi  = as.numeric(hi) * 100,
  P   = as.numeric(hi) * 100
)

# CRITERIO NUMÉRICO ORDINAL 

tabla_era$Era_num <- NA

tabla_era$Era_num[tabla_era$Era == "Precambrico"] <- 1
tabla_era$Era_num[tabla_era$Era == "Paleozoico"] <- 2
tabla_era$Era_num[tabla_era$Era == "Mesozoico"]  <- 3
tabla_era$Era_num[tabla_era$Era == "Cenozoico"]  <- 4

fila_total2 <- data.frame(
  Era     = "TOTAL",
  ni      = sum(tabla_era$ni),
  hi      = sum(tabla_era$hi),
  P       = sum(tabla_era$P),
  Era_num = NA
)

tabla_era_f <- rbind(tabla_era, fila_total2)


tabla_era_f
##           Era   ni    hi     P Era_num
## 1 Precambrico  343  31.6  31.6       1
## 2  Paleozoico  522  48.0  48.0       2
## 3   Mesozoico  149  13.7  13.7       3
## 4   Cenozoico   73   6.7   6.7       4
## 5       TOTAL 1087 100.0 100.0      NA

TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA FINAL

tabla_era_gt <- tabla_era_f %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 1**"),
    subtitle = md("Distribución de probabilidad por era geológica en <br>
                 depósitos masivos de sulfuros volcánicos")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 2")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    row.striping.include_table_body = TRUE
  )

tabla_era_gt
Tabla N° 1
Distribución de probabilidad por era geológica en
depósitos masivos de sulfuros volcánicos
Era ni hi P Era_num
Precambrico 343 31.6 31.6 1
Paleozoico 522 48.0 48.0 2
Mesozoico 149 13.7 13.7 3
Cenozoico 73 6.7 6.7 4
TOTAL 1087 100.0 100.0 NA
Autor: Grupo 2

GRÁFICAS DE DISTRIBUCIÓN DE PROBABILIDAD

Diagrama de barras

hi_global <- tabla_era$P[tabla_era$Era != "TOTAL"]
eras_num  <- tabla_era$Era_num[tabla_era$Era != "TOTAL"]

barplot(
  hi_global,
  main = "Gráfica N°1: Distribución de probabilidad por era geológica
  Depósitos masivos de sulfuros volcánicos",
  xlab = "Era geológica ",
  ylab = "Probabilidad ",
  col = "gray",
  names.arg = eras_num,   
  ylim = c(0, 100)
)
  

mtext(
  "1=Precambrico   2=Paleozoico   3=Mesozoico   4=Cenozoico",
  side = 1,
  line = 4,
  cex = 0.8
)

CONJETURA DEL MODELO

#CONJETURA DEL MODELO

n <- sum(tabla_era$ni)  
n
## [1] 1087
x <- tabla_era$ni      
x
## [1] 343 522 149  73
X <- 0:(length(x)-1)
X
## [1] 0 1 2 3
k=4 # 4 categorias

gl= k-1-1  # Grados de libertad
gl
## [1] 2
media_observada <- sum(X* tabla_era$ni) / sum(tabla_era$ni)
media_observada
## [1] 0.9558418
#p
p <- media_observada / (length(x) - 1)  # p = E(X)/n
p
## [1] 0.3186139
# q
q <- 1 - p
q
## [1] 0.6813861
#Aplicar la fórmula: P(X = x) = C(n,x) * p^x * q^(n-x)

P_binomial <- dbinom(X, size = length(x)-1, prob = p)  
P_binomial
## [1] 0.31635869 0.44378489 0.20751238 0.03234404
barplot(rbind(hi_global, P_binomial*100), beside = TRUE,
        col = c("skyblue", "blue"),
        names.arg = tabla_era$Era_num,
        main = "Gráfica N°2: Modelo de probabilidad Binomial de la era geologica",
        ylab = "Probabilidad", xlab = "Era geologica",
        ylim = c(0,100))
mtext(
  "1=Precambrico   2=Paleozoico   3=Mesozoico   4=Cenozoico",
  side = 1,
  line = 4,
  cex = 0.8
)


legend("topright", legend = c("Real", "Modelo"),
       fill = c("skyblue", "blue"), cex = 0.5)

TEST DE APROBACIÓN

Test de Pearson

#TEST DE PEARSON
plot(hi_global, P_binomial*100, main = "Gráfica N°3: Correlación de frecuencias en el modelo Binomial 
     de la era geologica",
     xlab="Frecuencia Observada",
     ylab = "Frecuencia Esperada", pch = 19, col = "darkblue")
abline(lm(P_binomial*100 ~ hi_global), col = "red", lwd = 2)

Fo<-hi_global/100
Fe<-P_binomial
Correlación<-cor(Fo,Fe)*100
Correlación
## [1] 96.34981

Test de Chi-Cuadrado

#TEST DE CHI-CUADRADO
x2<-sum(((Fo-Fe)^2)/Fe)
x2
## [1] 0.06404888
vc<-qchisq(0.95,gl)
vc
## [1] 5.991465
x2<vc
## [1] TRUE

TABLA DE RESUMEN

#TABLA RESUMEN
Variable<-c("Era geológica")
tabla_resumen<-data.frame(Variable,round(Correlación),round(x2,2),round(vc,2))
colnames(tabla_resumen)<-c("Variable","Test Pearson (%)","Chi Cuadrado","Umbral de aceptación")
library(knitr)
kable(tabla_resumen, format = "markdown", caption = "Tabla Nº2: Resumen de test de bondad al modelo de probabilidad")
Tabla Nº2: Resumen de test de bondad al modelo de probabilidad
Variable Test Pearson (%) Chi Cuadrado Umbral de aceptación
Era geológica 96 0.06 5.99

CÁLCULO DE PROBABILIDADES

# ¿CUAL ES LA PROBABILIDAD QUE UN DEPOSITO DE SULFUROS VOLCANICO SEA DE LA ERA PALEOZOICA?
dbinom(1, size = length(x)-1, prob=p)*100
## [1] 44.37849
# Gráfico de texto explicativo

plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")

text(
  x = 1, y = 1,
  labels = paste(
    "Cálculo de probabilidad\n",
    "¿Cual es la probabilidad que un depósito\n",
    "masivo de sulfuro volcánico sea de la era\n",
    "Paleozoica?\n",
    "Probabilidad = ", round(dbinom(1, size = length(x)-1, prob=p)*100,2), " (%)",
    sep = ""
  ),
  cex = 1.4,
  col = "black",
  font = 2
)

CONCLUSIÓN