CARGA DE DATOS
#Carga de datos
setwd("~/UNI/ESTADISTICA")
datos <- read.csv("Depositos_Sulfuro.csv", sep = ";", dec = ".", header = TRUE)
datos2 <- read.csv2("Clasificacion depage.csv", sep = ";", dec = ".", header = TRUE)
CARGA DE LIBRERIAS
#Carga de librerias
library(dplyr)
library(gt)
library(knitr)
TABLA DE DISTRIBUCION DE PROBABILIDAD POR EDAD GEOLÓGICA
# TABLA DE FRECUENCIAS – VARIABLE EDAD GEOLOGICA
edad <- datos$depage
ni <- table(edad)
hi <- prop.table(ni) * 100
tabla_final <- data.frame(
edad = names(ni),
ni = as.numeric(ni),
hi = as.numeric(hi)
)
fila_total <- data.frame(
edad = "TOTAL",
ni = sum(tabla_final$ni),
hi = sum(tabla_final$hi)
)
tabla_final_p <- rbind(tabla_final, fila_total)
tabla_final_p
## edad ni hi
## 1 Upper Devonian (378.3\xb139 - 364\xb115) 1 0.09174312
## 2 Upper Triassic (217\xb128, Re-Os) 1 0.09174312
## 3 Miocene-Pliocene (14\xb12 - 2.2) 1 0.09174312
## 4 3 0.27522936
## 5 Archean 114 10.45871560
## 6 Archean-Mid Proterozoic 1 0.09174312
## 7 Archean-Proterozoic 1 0.09174312
## 8 Cambrian 31 2.84403670
## 9 Cambrian-Ordovician 13 1.19266055
## 10 Cambrian-Silurian 3 0.27522936
## 11 Cambrian(?) 2 0.18348624
## 12 Carboniferous 35 3.21100917
## 13 Cretaceous 27 2.47706422
## 14 Cretaceous-Eocene 1 0.09174312
## 15 Cretaceous or Paleocene-Eocene 1 0.09174312
## 16 Devonian 27 2.47706422
## 17 Devonian-Carboniferous 6 0.55045872
## 18 Devonian-Mississippian 7 0.64220183
## 19 Devonian-Pennsylvanian 1 0.09174312
## 20 Devonian-Permian 1 0.09174312
## 21 Early- Middle Devonian (Emsian-Eifelian) 1 0.09174312
## 22 Early-Middle Devonian 2 0.18348624
## 23 Early-Middle Devonian (Emsian-Eifelian) 1 0.09174312
## 24 Early-Middle Devonian(Emsian-Eifelian) 1 0.09174312
## 25 Early Cambrian 9 0.82568807
## 26 Early Carboniferous 1 0.09174312
## 27 Early Carboniferous (Visean-Namurian) 4 0.36697248
## 28 Early Carboniferous (Visean-Namurian?) 1 0.09174312
## 29 Early Cretaceous 6 0.55045872
## 30 Early Devonian 5 0.45871560
## 31 Early Devonian (Emsian) 8 0.73394495
## 32 Early Ordovician 1 0.09174312
## 33 Early Proterozoic 1 0.09174312
## 34 Early Silurian 1 0.09174312
## 35 Early Silurian (Llandovery) 2 0.18348624
## 36 Early Silurian(Llandovery) 1 0.09174312
## 37 Early to Middle Cretaceous 1 0.09174312
## 38 Eocene 8 0.73394495
## 39 Eocene-Middle Miocene 1 0.09174312
## 40 Eocene-Pliocene 8 0.73394495
## 41 Jurassic 22 2.01834862
## 42 Jurassic-Early Cretaceous 1 0.09174312
## 43 Jurassic-Lower Cretaceous 4 0.36697248
## 44 Jurassic or Cretaceous 1 0.09174312
## 45 Late-Middle Triassic 1 0.09174312
## 46 Late Ordovician 1 0.09174312
## 47 Late Archean 27 2.47706422
## 48 Late Cambrian-Early Ordovician 1 0.09174312
## 49 Late Carboniferous 2 0.18348624
## 50 Late Cretaceous 1 0.09174312
## 51 Late Cretaceous (Turonian) 4 0.36697248
## 52 Late Devonian 3 0.27522936
## 53 Late Devonian (Frasnian) 3 0.27522936
## 54 Late Jurassic 1 0.09174312
## 55 Late Jurassic-Lower Cretaceous 4 0.36697248
## 56 Late Ordovician 6 0.55045872
## 57 Late Permian 2 0.18348624
## 58 Late Permian-Early Triassic 1 0.09174312
## 59 Late Precambrian 1 0.09174312
## 60 Late Proterozoic 1 0.09174312
## 61 Late Silurian 1 0.09174312
## 62 Late Silurian-Early Devonian 2 0.18348624
## 63 Late Silurian-Lower Devonian 1 0.09174312
## 64 Late Triassic 1 0.09174312
## 65 Late Triassic-Late Jurassic 1 0.09174312
## 66 Lower-Middle Carboniferous 1 0.09174312
## 67 Lower-Middle Devonian (Emsian-early Eifelian) 1 0.09174312
## 68 Lower-Middle Devonian (Emsian-Eifelian) 2 0.18348624
## 69 Lower-Middle Jurassic 1 0.09174312
## 70 Lower Cretaceous 14 1.28440367
## 71 Lower Devonian 1 0.09174312
## 72 Lower Devonian (407, U-Pb) 1 0.09174312
## 73 Lower Devonian (Emsian) 2 0.18348624
## 74 Lower Devonian (late Emsian) 2 0.18348624
## 75 Lower Jurassic 2 0.18348624
## 76 Lower or mid-Cretaceous 1 0.09174312
## 77 Lower Ordovician 7 0.64220183
## 78 Lower Paleocene-Upper Eocene 2 0.18348624
## 79 Lower Permian 1 0.09174312
## 80 Lower Permian-Late Carboniferous (292-283) 1 0.09174312
## 81 Lower Proterozoic 16 1.46788991
## 82 Lower Tertiary 5 0.45871560
## 83 Meso-Neoproterozoic 1 0.09174312
## 84 Mesoproterozoic 3 0.27522936
## 85 Mesoproterozoic (1216-1026, Sm-Nd) 1 0.09174312
## 86 Mesoproterozoic? (Riphean?) 1 0.09174312
## 87 Mesozoic 2 0.18348624
## 88 Mid Cretaceous 1 0.09174312
## 89 Middle-Late Devonian (Givetian-Frasnian) 1 0.09174312
## 90 Middle-Upper Devonian 3 0.27522936
## 91 Middle-Upper Devonian (Givetian-Frasnian) 6 0.55045872
## 92 Middle-Upper Jurassic 1 0.09174312
## 93 Middle Cambrian 5 0.45871560
## 94 Middle Devonian 23 2.11009174
## 95 Middle Devonian (Eifelian-Givetian) 12 1.10091743
## 96 Middle Devonian (Eifelian to early Givetian) 1 0.09174312
## 97 Middle Devonian (Eifelian) 13 1.19266055
## 98 Middle Devonian (Emsian-Eifelian) 1 0.09174312
## 99 Middle Devonian (Givetian) 4 0.36697248
## 100 Middle Devonian (Late Givetian) 1 0.09174312
## 101 Middle Devonian (lower Givetian) 1 0.09174312
## 102 Middle Devonian(Givetian) 2 0.18348624
## 103 Middle Jurassic (late Bajocian) 3 0.27522936
## 104 Middle Miocene 1 0.09174312
## 105 Middle Ordovician 2 0.18348624
## 106 Middle Proterozoic 2 0.18348624
## 107 Middle Proterozoic? 1 0.09174312
## 108 Miocene 39 3.57798165
## 109 Miocene? 1 0.09174312
## 110 Mississippian 4 0.36697248
## 111 Neoarchean-Paleoproterozoic (2.8-2.6 Ga) 1 0.09174312
## 112 Neoproterozoic 3 0.27522936
## 113 Neoproterozoic (976.4-802.3) 1 0.09174312
## 114 Neoproterozoic (Early Paleozoic?) 1 0.09174312
## 115 Neoproterozoic, Sinian 1 0.09174312
## 116 Oligocene 1 0.09174312
## 117 Ordovician 108 9.90825688
## 118 Ordovician-Silurian 5 0.45871560
## 119 Ordovician to Precambrian 1 0.09174312
## 120 Orodivician 1 0.09174312
## 121 Paleoproterozoic 2 0.18348624
## 122 Paleoproterozoic (1700-2000) 2 0.18348624
## 123 Paleozoic 29 2.66055046
## 124 Paleozoic-Mid Mesozoic 1 0.09174312
## 125 Paleozoic-Triassic 1 0.09174312
## 126 Pennsylvanian-Permian 1 0.09174312
## 127 Permian 9 0.82568807
## 128 Permian-Cretaceous 1 0.09174312
## 129 Permian-Triassic 2 0.18348624
## 130 Pliocene 1 0.09174312
## 131 pre-Eocene 1 0.09174312
## 132 Pre-Tertiary 1 0.09174312
## 133 Precambrian 2 0.18348624
## 134 Precambrian-Late Cambrian 2 0.18348624
## 135 Precambrian Z (570-800 Ma) 1 0.09174312
## 136 Proterozoic 154 14.12844037
## 137 Proterozoic or Ordovician 2 0.18348624
## 138 Silurian 39 3.57798165
## 139 Silurian-Devonian 2 0.18348624
## 140 Silurian to Carboniferous 1 0.09174312
## 141 Tertiary 1 0.09174312
## 142 Triassic 5 0.45871560
## 143 Triassic-Jurassic 1 0.09174312
## 144 Triassic or Jurassic 1 0.09174312
## 145 Upper-Devonian-Lower Carboniferous 1 0.09174312
## 146 Upper Carboniferous 1 0.09174312
## 147 Upper Cretaceous 29 2.66055046
## 148 Upper Devonian 2 0.18348624
## 149 Upper Devonian- Lower Carboniferous 1 0.09174312
## 150 Upper Devonian-Lower Carboniferou 2 0.18348624
## 151 Upper Devonian-Lower Carboniferous 27 2.47706422
## 152 Upper Devonian (Famennian) 2 0.18348624
## 153 Upper Devonian (Frasnian) 1 0.09174312
## 154 Upper Jurassic 1 0.09174312
## 155 Upper Jurassic-Lower Cretaceous 4 0.36697248
## 156 Upper Proterozoic 2 0.18348624
## 157 Upper Silurian 1 0.09174312
## 158 Upper Triassic 1 0.09174312
## 159 Upper Devonian (373\xb115) 1 0.09174312
## 160 TOTAL 1090 100.00000000
Debido a que la tabla presenta numerosos registros de edad geológicas , se decidió agruparlos por eras geológicas convirtiendose en una variable ordinal
TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA
# VARIABLE ERA GEOLÓGICA
Era <- as.character(datos2$Classificacion)
Era <- Era[!is.na(Era)]
Era <- chartr("áéíóúÁÉÍÓÚ", "aeiouAEIOU", Era)
Era <- gsub("precambico|precámbico", "Precambrico", Era, ignore.case = TRUE)
Era <- gsub("paleozoico", "Paleozoico", Era, ignore.case = TRUE)
Era <- gsub("mesozoico", "Mesozoico", Era, ignore.case = TRUE)
Era <- gsub("cenozoico", "Cenozoico", Era, ignore.case = TRUE)
Era <- factor(
Era,
levels = c("Precambrico", "Paleozoico", "Mesozoico", "Cenozoico"),
ordered = TRUE
)
# TABLA DE DISTRIBUCIÓN DE FRECUENCIAS – ERA
ni <- table(Era)
hi <- round(prop.table(ni), 3)
tabla_era <- data.frame(
Era = names(ni),
ni = as.numeric(ni),
hi = as.numeric(hi) * 100,
P = as.numeric(hi) * 100
)
# CRITERIO NUMÉRICO ORDINAL
tabla_era$Era_num <- NA
tabla_era$Era_num[tabla_era$Era == "Precambrico"] <- 1
tabla_era$Era_num[tabla_era$Era == "Paleozoico"] <- 2
tabla_era$Era_num[tabla_era$Era == "Mesozoico"] <- 3
tabla_era$Era_num[tabla_era$Era == "Cenozoico"] <- 4
fila_total2 <- data.frame(
Era = "TOTAL",
ni = sum(tabla_era$ni),
hi = sum(tabla_era$hi),
P = sum(tabla_era$P),
Era_num = NA
)
tabla_era_f <- rbind(tabla_era, fila_total2)
tabla_era_f
## Era ni hi P Era_num
## 1 Precambrico 343 31.6 31.6 1
## 2 Paleozoico 522 48.0 48.0 2
## 3 Mesozoico 149 13.7 13.7 3
## 4 Cenozoico 73 6.7 6.7 4
## 5 TOTAL 1087 100.0 100.0 NA
TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA FINAL
tabla_era_gt <- tabla_era_f %>%
gt() %>%
tab_header(
title = md("**Tabla N° 1**"),
subtitle = md("Distribución de probabilidad por era geológica en <br>
depósitos masivos de sulfuros volcánicos")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
table_body.hlines.color = "gray",
row.striping.include_table_body = TRUE
)
tabla_era_gt
| Tabla N° 1 | ||||
| Distribución de probabilidad por era geológica en depósitos masivos de sulfuros volcánicos |
||||
| Era | ni | hi | P | Era_num |
|---|---|---|---|---|
| Precambrico | 343 | 31.6 | 31.6 | 1 |
| Paleozoico | 522 | 48.0 | 48.0 | 2 |
| Mesozoico | 149 | 13.7 | 13.7 | 3 |
| Cenozoico | 73 | 6.7 | 6.7 | 4 |
| TOTAL | 1087 | 100.0 | 100.0 | NA |
| Autor: Grupo 2 | ||||
Diagrama de barras
hi_global <- tabla_era$P[tabla_era$Era != "TOTAL"]
eras_num <- tabla_era$Era_num[tabla_era$Era != "TOTAL"]
barplot(
hi_global,
main = "Gráfica N°1: Distribución de probabilidad por era geológica
Depósitos masivos de sulfuros volcánicos",
xlab = "Era geológica ",
ylab = "Probabilidad ",
col = "gray",
names.arg = eras_num,
ylim = c(0, 100)
)
mtext(
"1=Precambrico 2=Paleozoico 3=Mesozoico 4=Cenozoico",
side = 1,
line = 4,
cex = 0.8
)
#CONJETURA DEL MODELO
n <- sum(tabla_era$ni)
n
## [1] 1087
x <- tabla_era$ni
x
## [1] 343 522 149 73
X <- 0:(length(x)-1)
X
## [1] 0 1 2 3
k=4 # 4 categorias
gl= k-1-1 # Grados de libertad
gl
## [1] 2
media_observada <- sum(X* tabla_era$ni) / sum(tabla_era$ni)
media_observada
## [1] 0.9558418
#p
p <- media_observada / (length(x) - 1) # p = E(X)/n
p
## [1] 0.3186139
# q
q <- 1 - p
q
## [1] 0.6813861
#Aplicar la fórmula: P(X = x) = C(n,x) * p^x * q^(n-x)
P_binomial <- dbinom(X, size = length(x)-1, prob = p)
P_binomial
## [1] 0.31635869 0.44378489 0.20751238 0.03234404
barplot(rbind(hi_global, P_binomial*100), beside = TRUE,
col = c("skyblue", "blue"),
names.arg = tabla_era$Era_num,
main = "Gráfica N°2: Modelo de probabilidad Binomial de la era geologica",
ylab = "Probabilidad", xlab = "Era geologica",
ylim = c(0,100))
mtext(
"1=Precambrico 2=Paleozoico 3=Mesozoico 4=Cenozoico",
side = 1,
line = 4,
cex = 0.8
)
legend("topright", legend = c("Real", "Modelo"),
fill = c("skyblue", "blue"), cex = 0.5)
Test de Pearson
#TEST DE PEARSON
plot(hi_global, P_binomial*100, main = "Gráfica N°3: Correlación de frecuencias en el modelo Binomial
de la era geologica",
xlab="Frecuencia Observada",
ylab = "Frecuencia Esperada", pch = 19, col = "darkblue")
abline(lm(P_binomial*100 ~ hi_global), col = "red", lwd = 2)
Fo<-hi_global/100
Fe<-P_binomial
Correlación<-cor(Fo,Fe)*100
Correlación
## [1] 96.34981
Test de Chi-Cuadrado
#TEST DE CHI-CUADRADO
x2<-sum(((Fo-Fe)^2)/Fe)
x2
## [1] 0.06404888
vc<-qchisq(0.95,gl)
vc
## [1] 5.991465
x2<vc
## [1] TRUE
TABLA DE RESUMEN
#TABLA RESUMEN
Variable<-c("Era geológica")
tabla_resumen<-data.frame(Variable,round(Correlación),round(x2,2),round(vc,2))
colnames(tabla_resumen)<-c("Variable","Test Pearson (%)","Chi Cuadrado","Umbral de aceptación")
library(knitr)
kable(tabla_resumen, format = "markdown", caption = "Tabla Nº2: Resumen de test de bondad al modelo de probabilidad")
| Variable | Test Pearson (%) | Chi Cuadrado | Umbral de aceptación |
|---|---|---|---|
| Era geológica | 96 | 0.06 | 5.99 |
# ¿CUAL ES LA PROBABILIDAD QUE UN DEPOSITO DE SULFUROS VOLCANICO SEA DE LA ERA PALEOZOICA?
dbinom(1, size = length(x)-1, prob=p)*100
## [1] 44.37849
# Gráfico de texto explicativo
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(
x = 1, y = 1,
labels = paste(
"Cálculo de probabilidad\n",
"¿Cual es la probabilidad que un depósito\n",
"masivo de sulfuro volcánico sea de la era\n",
"Paleozoica?\n",
"Probabilidad = ", round(dbinom(1, size = length(x)-1, prob=p)*100,2), " (%)",
sep = ""
),
cex = 1.4,
col = "black",
font = 2
)