ANÁLISIS ESTADÍSTICO

CARGA DE DATOS Y LIBRERÍAS

CARGA DE DATOS

#Carga de datos

setwd("~/UNI/ESTADISTICA")
datos <- read.csv("Depositos_Sulfuro.csv", header = TRUE, sep = ";", dec = ".")

CARGA DE LIBRERIAS

#Carga de librerias
library(countrycode)
library(gt)
library(dplyr)
library(knitr)

TABLA DE DISTRIBUCIÓN DE PROBABILIDAD

TABLA DE DISTRIBUCION DE PROBABILIDAD POR PAIS

#Tabla de distribución de probabilidad por País

TablaPais <- as.data.frame(table(datos$country))
colnames(TablaPais) <- c("Pais", "ni")
TablaPais$hi <- round(TablaPais$ni / sum(TablaPais$ni), 4)
TablaPais$P <- round(TablaPais$hi * 100, 2)

#Fila TOTAL
total_pais <- data.frame(
  Pais = "Total",
  ni = sum(TablaPais$ni),
  hi = round(sum(TablaPais$hi),),
  P  = round(sum(TablaPais$P),)
)

# Unir tabla final
TablaPaisFinal <- rbind(TablaPais, total_pais)

# Mostrar tabla
TablaPaisFinal

##                  Pais   ni     hi      P
## 1           Argentina    2 0.0018   0.18
## 2             Armenia    4 0.0037   0.37
## 3           Australia   56 0.0514   5.14
## 4             Bolivia    1 0.0009   0.09
## 5              Brazil    3 0.0028   0.28
## 6              Canada  317 0.2908  29.08
## 7               Chile    2 0.0018   0.18
## 8               China   37 0.0339   3.39
## 9            Colombia    6 0.0055   0.55
## 10               Cuba   13 0.0119   1.19
## 11             Cyprus   18 0.0165   1.65
## 12 Dominican Republic    3 0.0028   0.28
## 13            Ecuador    2 0.0018   0.18
## 14              Egypt    1 0.0009   0.09
## 15            Eritrea    3 0.0028   0.28
## 16               Fiji    3 0.0028   0.28
## 17            Finland   12 0.0110   1.10
## 18             France    4 0.0037   0.37
## 19            Georgia    4 0.0037   0.37
## 20      Great Britain    1 0.0009   0.09
## 21          Guatemala    1 0.0009   0.09
## 22             Guyana    2 0.0018   0.18
## 23              India    2 0.0018   0.18
## 24          Indonesia    4 0.0037   0.37
## 25               Iran    2 0.0018   0.18
## 26            Ireland    1 0.0009   0.09
## 27              Japan   82 0.0752   7.52
## 28         Kazakhstan   46 0.0422   4.22
## 29             Mexico   16 0.0147   1.47
## 30           Mongolia    1 0.0009   0.09
## 31            Morocco    3 0.0028   0.28
## 32             Norway   49 0.0450   4.50
## 33               Oman    3 0.0028   0.28
## 34           Pakistan    1 0.0009   0.09
## 35               Peru    5 0.0046   0.46
## 36        Philippines   19 0.0174   1.74
## 37           Portugal   14 0.0128   1.28
## 38             Russia   90 0.0826   8.26
## 39       Saudi Arabia   21 0.0193   1.93
## 40              Spain   61 0.0560   5.60
## 41             Sweden   40 0.0367   3.67
## 42             Turkey   26 0.0239   2.39
## 43   Union of Myanmar    1 0.0009   0.09
## 44      United States  100 0.0917   9.17
## 45         Uzbekistan    5 0.0046   0.46
## 46          Venezuela    3 0.0028   0.28
## 47              Total 1090 1.0000 100.00

TABLA DE DISTRIBUCION DE PROBABILIDAD POR PAIS

tabla_pais_gt <- TablaPaisFinal %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 1**"),
    subtitle = md("**Distribución de probabilidad de los Depósitos Masivos<br>
                     de Sulfuro Volcánicos por País**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 2")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black",
    row.striping.include_table_body = TRUE
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(rows = Pais == "Total")
  )

tabla_pais_gt

Pais	ni	hi	P
Tabla N° 1
Distribución de probabilidad de los Depósitos Masivos de Sulfuro Volcánicos por País
Argentina	2	0.0018	0.18
Armenia	4	0.0037	0.37
Australia	56	0.0514	5.14
Bolivia	1	0.0009	0.09
Brazil	3	0.0028	0.28
Canada	317	0.2908	29.08
Chile	2	0.0018	0.18
China	37	0.0339	3.39
Colombia	6	0.0055	0.55
Cuba	13	0.0119	1.19
Cyprus	18	0.0165	1.65
Dominican Republic	3	0.0028	0.28
Ecuador	2	0.0018	0.18
Egypt	1	0.0009	0.09
Eritrea	3	0.0028	0.28
Fiji	3	0.0028	0.28
Finland	12	0.0110	1.10
France	4	0.0037	0.37
Georgia	4	0.0037	0.37
Great Britain	1	0.0009	0.09
Guatemala	1	0.0009	0.09
Guyana	2	0.0018	0.18
India	2	0.0018	0.18
Indonesia	4	0.0037	0.37
Iran	2	0.0018	0.18
Ireland	1	0.0009	0.09
Japan	82	0.0752	7.52
Kazakhstan	46	0.0422	4.22
Mexico	16	0.0147	1.47
Mongolia	1	0.0009	0.09
Morocco	3	0.0028	0.28
Norway	49	0.0450	4.50
Oman	3	0.0028	0.28
Pakistan	1	0.0009	0.09
Peru	5	0.0046	0.46
Philippines	19	0.0174	1.74
Portugal	14	0.0128	1.28
Russia	90	0.0826	8.26
Saudi Arabia	21	0.0193	1.93
Spain	61	0.0560	5.60
Sweden	40	0.0367	3.67
Turkey	26	0.0239	2.39
Union of Myanmar	1	0.0009	0.09
United States	100	0.0917	9.17
Uzbekistan	5	0.0046	0.46
Venezuela	3	0.0028	0.28
Total	1090	1.0000	100.00
Autor: Grupo 2

TABLA DE DISTRIBUCIÓN DE PROBABILIDAD AGRUPADA

Debido a que la tabla presenta numerosos registros de paises, se decidió agruparlos por continentes

TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA

# Tabla de distribución de probabilidad por continente

# Asignar continente
TablaPais$Continente <- countrycode(
  TablaPais$Pais,
  origin = "country.name",
  destination = "continent"
)

# Agregar por continente
TablaContinente <- aggregate(ni ~ Continente, data = TablaPais, sum)

# Calculo de frecuencias

TablaContinente$hi <- round(TablaContinente$ni / sum(TablaContinente$ni),4)

TablaContinente$P <- round( TablaContinente$hi * 100,2)


# Fila TOTAL
total_continente <- data.frame(
  Continente = "Total",
  ni = sum(TablaContinente$ni),
  hi = round(sum(TablaContinente$hi),),
  P  = round(sum(TablaContinente$P),)
)

# Tabla final
TablaContinenteFinal <- rbind(TablaContinente, total_continente)

TablaContinenteFinal

##   Continente   ni     hi      P
## 1     Africa    7 0.0064   0.64
## 2   Americas  476 0.4367  43.67
## 3       Asia  276 0.2532  25.32
## 4     Europe  272 0.2495  24.95
## 5    Oceania   59 0.0541   5.41
## 6      Total 1090 1.0000 100.00

TABLA DE DISTRIBUCION DE PROBABILIDAD AGRUPADA FINAL

tabla_continente_gt <- TablaContinenteFinal %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 2**"),
    subtitle = md("**Distribución de probabilidad de los Depósitos Masivos<br>
                     de Sulfuro Volcánicos por continente**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 2")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black",
    row.striping.include_table_body = TRUE
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(rows = Continente == "Total")
  )

tabla_continente_gt

Continente	ni	hi	P
Tabla N° 2
Distribución de probabilidad de los Depósitos Masivos de Sulfuro Volcánicos por continente
Africa	7	0.0064	0.64
Americas	476	0.4367	43.67
Asia	276	0.2532	25.32
Europe	272	0.2495	24.95
Oceania	59	0.0541	5.41
Total	1090	1.0000	100.00
Autor: Grupo 2

GRÁFICAS DE DISTRIBUCIÓN DE PROBABILIDAD

Diagrama de barras

# Extraer porcentajes 
P_global <- as.numeric(
  TablaContinenteFinal$P[1:(nrow(TablaContinenteFinal) - 1)]
)

continentes <- TablaContinenteFinal$Continente[
  1:(nrow(TablaContinenteFinal) - 1)
]

#Diagrama de barras

barplot(
  P_global,
  main = "Gráfica Nº1: Distribución de probabilidad de los Depósitos Masivos
de Sulfuros Volcánicos en Continentes",
  xlab = "Continente",
  ylab = "Probabilidad (%)",
  col = "blue",
  names.arg = continentes,
  cex.names = 1,
  ylim = c(0, 100)
)

CALCULO DE PROBABILIDAD

# Identificar el continente con mayor probabilidad (excluye Total)
tabla_sin_total <- TablaContinenteFinal[TablaContinenteFinal$Continente != "Total", ]

continente_mayor <- tabla_sin_total$Continente[
  which.max(tabla_sin_total$P)
]

prob_mayor <- tabla_sin_total$P[
  which.max(tabla_sin_total$P)
]

# Gráfico de texto explicativo

plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")

text(
  x = 1, y = 1,
  labels = paste(
    "Cálculo de probabilidad\n(Estimación general)\n\n",
    "¿Qué continente es más probable\n",
    "que concentre la mayor cantidad de\n",
    "depósitos masivos de sulfuros volcánicos?\n\n",
    "R: ", continente_mayor, "\n",
    "Probabilidad = ", prob_mayor, " (%)",
    sep = ""
  ),
  cex = 1.4,
  col = "black",
  font = 2
)

Análisis inferencial de Depósitos Masivos de Sulfuro Volcánicos (Variable Pais/Continente)

Grupo 2

2025-12-23