Se cargan las librerías necesarias y el dataset Global Oil and Gas Extraction Tracker (GOGET), que contiene registros de unidades de extracción de petróleo y gas a nivel mundial.
library(readxl)
library(dplyr)
library(gt)
library(ggplot2)
library(scales)
library(forcats)
setwd("C:/Users/DETPC/Downloads")
datos <- read_excel("dataset_mundial_petro.xlsx")
# Diccionario de mapeo País -> Continente
continente_map <- c(
"Austria"="Europa","Brazil"="América del Sur","Cyprus"="Europa","France"="Europa","Germany"="Europa",
"Hungary"="Europa","Ireland"="Europa","Israel"="Asia","Italy"="Europa","Netherlands"="Europa",
"Norway"="Europa","Poland"="Europa","Romania"="Europa","Spain"="Europa","Türkiye"="Europa/Asia",
"United Kingdom"="Europa","Mexico"="América del Norte","Colombia"="América del Sur","Ecuador"="América del Sur",
"Denmark"="Europa","Peru"="América del Sur","India"="Asia","Bolivia"="América del Sur","Venezuela"="América del Sur",
"Guatemala"="América del Norte","Cuba"="América del Norte","Thailand"="Asia","Thailand-Malaysia"="Asia",
"Canada"="América del Norte","Australia"="Oceanía","China"="Asia","Trinidad and Tobago"="América del Norte",
"Nigeria"="África","Egypt"="África","Angola"="África","Malaysia"="Asia","Iraq"="Asia","Russia"="Europa/Asia",
"Iran"="Asia","Iran-Iraq"="Asia","Saudi Arabia-Iran"="Asia","United Arab Emirates-Iran"="Asia",
"United Arab Emirates"="Asia","Kuwait"="Asia","Kuwait-Saudi Arabia"="Asia","Kuwait-Saudi Arabia-Iran"="Asia",
"Qatar"="Asia","Oman"="Asia","Kazakhstan"="Asia","Algeria"="África","Libya"="África","Indonesia"="Asia",
"Azerbaijan"="Asia","United States"="América del Norte","Pakistan"="Asia","Bangladesh"="Asia","Ghana"="África",
"Guyana"="América del Sur","Mauritania"="África","Myanmar"="Asia","South Africa"="África","Suriname"="América del Sur",
"Vietnam"="Asia","Mozambique"="África","Tanzania"="África","Senegal-Mauritania"="África","Senegal"="África",
"Uganda"="África","Kenya"="África","Brunei"="Asia","Russia-Kazakhstan"="Asia","Jamaica"="América del Norte",
"Grenada"="América del Norte","New Zealand"="Oceanía","Papua New Guinea"="Oceanía","Timor-Leste"="Asia",
"Saudi Arabia"="Asia","Argentina"="América del Sur","Republic of the Congo"="África","Ethiopia"="África",
"Côte d'Ivoire"="África","Namibia"="África","Gabon"="África","South Sudan"="África","Chad"="África",
"Morocco"="África","Ukraine"="Europa","China-Japan"="Asia","Cameroon"="África","Vietnam-Malaysia"="Asia",
"Albania"="Europa","Bahrain"="Asia","Japan"="Asia","Madagascar"="África","Philippines"="Asia",
"Saudi Arabia-Bahrain"="Asia","Tunisia"="África","Turkmenistan"="Asia","Palestine"="Asia","Barbados"="América del Norte",
"Chile"="América del Sur","Syria"="Asia","Timor Gap"="Asia","Zimbabwe"="África"
)
datos <- datos %>%
mutate(Continente = continente_map[Country]) %>%
filter(!is.na(Continente), Continente != "NA")
cat("Registros válidos:", nrow(datos), "\n")## Registros válidos: 49212
## Variables: 33
Se extrae la variable Continente. Es una variable de escala nominal: sus categorías no tienen orden jerárquico intrínseco.
## Variable analizada: Continente
## Total de observaciones (n): 49212
## Categorías presentes: Europa, América del Sur, Asia, Europa/Asia, América del Norte, Oceanía, África
Se calcula la frecuencia absoluta (nᵢ), la frecuencia relativa en proporción (hᵢ) y en porcentaje (hᵢ %) para cada categoría, ordenadas de mayor a menor.
tabla_freq <- datos %>%
count(Continente, name = "ni") %>%
arrange(desc(ni)) %>%
mutate(
hi_prop = ni / n,
hi_pct = hi_prop * 100,
i = row_number()
) %>%
select(i, Continente, ni, hi_pct, hi_prop)
k <- nrow(tabla_freq)
cat("Número de categorías (k):", k, "\n")## Número de categorías (k): 7
## Categoría más frecuente : América del Norte — 27405 registros
## Categoría menos frecuente: Oceanía — 526 registro(s)
## Verificación — Σnᵢ: 49212 (debe ser 49212 )
## Verificación — Σhᵢ%: 100 (debe ser 100)
tabla_freq %>%
gt() %>%
tab_header(
title = md("**Tabla N. 1**"),
subtitle = md("Distribución de frecuencias por continente — yacimientos de petróleo y gas")
) %>%
cols_label(
i = md("**N°**"),
Continente = md("**Continente**"),
ni = md("**nᵢ**"),
hi_pct = md("**(%)** "),
hi_prop = md("**(proporción)**")
) %>%
tab_spanner(label = md("**hᵢ**"), columns = c(hi_pct, hi_prop)) %>%
fmt_number(columns = ni, decimals = 0, use_seps = TRUE) %>%
fmt_number(columns = hi_pct, decimals = 2) %>%
fmt_number(columns = hi_prop, decimals = 4) %>%
grand_summary_rows(
columns = c(ni, hi_pct, hi_prop),
fns = list(label = "Total", fn = "sum"),
fmt = list(
~ fmt_number(., columns = ni, decimals = 0, use_seps = TRUE),
~ fmt_number(., columns = hi_pct, decimals = 2),
~ fmt_number(., columns = hi_prop, decimals = 4)
)
) %>%
tab_source_note("Autor: Grupo 5") %>%
tab_options(
table.width = pct(75),
table.font.size = px(13),
table.font.names = "Arial",
heading.title.font.size = px(15),
heading.subtitle.font.size = px(12),
heading.align = "center",
heading.background.color = "#AAAAAA",
column_labels.font.weight = "bold",
column_labels.background.color = "#FFFFFF",
column_labels.border.top.color = "#AAAAAA",
column_labels.border.bottom.color = "#AAAAAA",
table.border.top.color = "#AAAAAA",
table.border.bottom.color = "#AAAAAA"
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = c("title", "subtitle"))
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = list(cells_column_labels(), cells_column_spanners(), cells_grand_summary())
)| Tabla N. 1 | |||||
| Distribución de frecuencias por continente — yacimientos de petróleo y gas | |||||
| N° | Continente | nᵢ |
hᵢ
|
||
|---|---|---|---|---|---|
| (%) | (proporción) | ||||
| 1 | América del Norte | 27,405 | 55.69 | 0.5569 | |
| 2 | Europa | 7,351 | 14.94 | 0.1494 | |
| 3 | América del Sur | 6,079 | 12.35 | 0.1235 | |
| 4 | Asia | 3,734 | 7.59 | 0.0759 | |
| 5 | África | 2,197 | 4.46 | 0.0446 | |
| 6 | Europa/Asia | 1,920 | 3.90 | 0.0390 | |
| 7 | Oceanía | 526 | 1.07 | 0.0107 | |
| Total | — | — | 49,212 | 100.00 | 1.0000 |
| Autor: Grupo 5 | |||||
cont_graf <- tabla_freq %>%
mutate(Continente = fct_reorder(Continente, ni))
colores <- c(
"América del Norte" = "#AED6F1",
"Europa" = "#5DADE2",
"América del Sur" = "#2E86C1",
"Asia" = "#1A5276",
"África" = "#154360",
"Europa/Asia" = "#85C1E9",
"Oceanía" = "#D4E6F1"
)
# Tema para barras verticales: grilla horizontal, eje x legible
tema_base <- theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 13),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0),
axis.title = element_text(face = "bold", size = 11),
axis.text.x = element_text(face = "bold", angle = 15, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "#EEEEEE"),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA)
)
pie_label <- paste0("n = ", format(n, big.mark = ","),
" | Fuente: Global Energy Monitor — GOGET 2023")ggplot(cont_graf, aes(x = Continente, y = ni, fill = Continente)) +
geom_col(width = 0.55, color = "white") +
geom_text(aes(label = format(ni, big.mark = ",")),
vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = colores) +
scale_y_continuous(labels = label_comma(),
expand = expansion(mult = c(0, 0.12))) +
labs(title = "Gráfica N. 1: Distribución de yacimientos por continente",
x = "Continente", y = "Frecuencia Absoluta (nᵢ)",
caption = pie_label) +
tema_baseggplot(cont_graf, aes(x = Continente, y = hi_pct, fill = Continente)) +
geom_col(width = 0.55, color = "white") +
geom_text(aes(label = paste0(round(hi_pct, 2), "%")),
vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = colores) +
scale_y_continuous(labels = function(x) paste0(x, "%"),
expand = expansion(mult = c(0, 0.12))) +
labs(title = "Gráfica N. 2: Distribución porcentual por continente",
x = "Continente", y = "Frecuencia Relativa (%)",
caption = pie_label) +
tema_basetabla_freq %>%
mutate(Continente = fct_reorder(Continente, hi_pct),
etiqueta = paste0(round(hi_pct, 1), "%")) %>%
ggplot(aes(x = "", y = hi_pct, fill = Continente)) +
geom_col(width = 1, color = "white") +
geom_text(aes(label = etiqueta),
position = position_stack(vjust = 0.5),
size = 4, fontface = "bold") +
coord_polar(theta = "y") +
scale_fill_manual(values = colores) +
labs(title = "Gráfica N. 3: Distribución porcentual por continente",
fill = "Continente",
caption = pie_label) +
theme_void(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0.5),
legend.position = "right",
legend.title = element_text(face = "bold", size = 10),
plot.background = element_rect(fill = "white", color = NA)
)La variable Continente es cualitativa nominal. Para este tipo de variable, el único indicador de tendencia central aplicable es la moda.
moda <- tabla_freq$Continente[which.max(tabla_freq$ni)]
moda_n <- max(tabla_freq$ni)
moda_pct <- round(tabla_freq$hi_pct[which.max(tabla_freq$ni)], 2)
data.frame(
"Variable" = "Continente",
"Rango" = "D={'América del Norte', 'Europa', 'América del Sur', 'Asia', 'África', 'Europa/Asia', 'Oceanía'}",
"Media (X)" = "-",
"Mediana (Me)" = "-",
"Moda (Mo)" = moda,
"Varianza (V)" = "-",
"Desv. Est. (Sd)" = "-",
"C.V. (%)" = "-",
"Asimetría (As)" = "-",
"Curtosis (K)" = "-",
check.names = FALSE
) %>%
gt() %>%
tab_header(
title = md("**Tabla N°2 de Conclusiones — Continente en yacimientos de petróleo y gas**")
) %>%
tab_source_note("Autor: Grupo 5") %>%
tab_options(
table.width = pct(100),
table.font.size = px(12),
table.font.names = "Arial",
heading.align = "center",
heading.title.font.size = px(13),
heading.background.color = "#AAAAAA",
heading.border.bottom.color = "#AAAAAA",
column_labels.font.weight = "normal",
column_labels.background.color = "#FFFFFF",
column_labels.border.top.color = "#CCCCCC",
column_labels.border.top.width = px(1),
column_labels.border.bottom.color = "#CCCCCC",
column_labels.border.bottom.width = px(1),
table_body.border.bottom.color = "#CCCCCC",
table_body.border.bottom.width = px(1),
table.border.top.color = "#AAAAAA",
table.border.top.width = px(1),
table.border.bottom.color = "#AAAAAA",
table.border.bottom.width = px(1),
source_notes.font.size = px(11),
source_notes.border.lr.color = "transparent",
data_row.padding = px(5)
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = cell_text(color = "#333333", align = "center"),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(color = "#333333", align = "center"),
locations = cells_body(columns = c("Media (X)", "Mediana (Me)", "Moda (Mo)",
"Varianza (V)", "Desv. Est. (Sd)",
"C.V. (%)", "Asimetría (As)", "Curtosis (K)"))
)| Tabla N°2 de Conclusiones — Continente en yacimientos de petróleo y gas | |||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) |
|---|---|---|---|---|---|---|---|---|---|
| Continente | D={'América del Norte', 'Europa', 'América del Sur', 'Asia', 'África', 'Europa/Asia', 'Oceanía'} | - | - | América del Norte | - | - | - | - | - |
| Autor: Grupo 5 | |||||||||
La variable “continente” tiene como valor más frecuente “América del Norte”, con una participación destacada en la muestra, reflejando la fuerte concentración de unidades de extracción de petróleo y gas en esa región dentro del dataset analizado.