Se cargan las librerías necesarias y el dataset Global Oil and Gas Extraction Tracker (GOGET), que contiene registros de unidades de extracción de petróleo y gas a nivel mundial.
library(readxl)
library(dplyr)
library(gt)
library(ggplot2)
library(scales)
library(forcats)
setwd("C:/Users/ronny/Downloads/Dataset")
datos <- read_excel("dataset_mundial_petro.xlsx") %>%
filter(!is.na(`Fuel type`), `Fuel type` != "NA",
!is.na(`Unit type`), `Unit type` != "NA")
cat("Registros válidos:", nrow(datos), "\n")## Registros válidos: 8334
## Variables: 32
Se extrae la variable Fuel Type (Tipo de Combustible). Es una variable de escala nominal: sus categorías no tienen orden jerárquico intrínseco.
## Variable analizada: Fuel Type
## Total de observaciones (n): 8334
## Categorías presentes: oil and gas, gas, oil, gas and condensate
Se calcula la frecuencia absoluta (nᵢ), la frecuencia relativa en proporción (hᵢ) y en porcentaje (hᵢ %) para cada categoría, ordenadas de mayor a menor.
tabla_freq <- datos %>%
count(`Fuel type`, name = "ni") %>%
arrange(desc(ni)) %>%
rename(TipoCombustible = `Fuel type`) %>%
mutate(
hi_prop = ni / n,
hi_pct = hi_prop * 100,
i = row_number()
) %>%
select(i, TipoCombustible, ni, hi_pct, hi_prop)
k <- nrow(tabla_freq)
cat("Número de categorías (k):", k, "\n")## Número de categorías (k): 4
cat("Categoría más frecuente :", tabla_freq$TipoCombustible[1],
"—", tabla_freq$ni[1], "registros\n")## Categoría más frecuente : oil and gas — 5833 registros
cat("Categoría menos frecuente:", tabla_freq$TipoCombustible[k],
"—", tabla_freq$ni[k], "registro(s)\n")## Categoría menos frecuente: gas and condensate — 31 registro(s)
## Verificación — Σnᵢ: 8334 (debe ser 8334 )
## Verificación — Σhᵢ%: 100 (debe ser 100)
tabla_freq %>%
gt() %>%
tab_header(
title = md("**Tabla N. 1**"),
subtitle = md("Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas")
) %>%
cols_label(
i = md("**N°**"),
TipoCombustible = md("**Tipo de combustible**"),
ni = md("**nᵢ**"),
hi_pct = md("**(%)** "),
hi_prop = md("**(proporción)**")
) %>%
tab_spanner(label = md("**hᵢ**"), columns = c(hi_pct, hi_prop)) %>%
fmt_number(columns = ni, decimals = 0, use_seps = TRUE) %>%
fmt_number(columns = hi_pct, decimals = 2) %>%
fmt_number(columns = hi_prop, decimals = 4) %>%
grand_summary_rows(
columns = c(ni, hi_pct, hi_prop),
fns = list(label = "Total", fn = "sum"),
fmt = list(
~ fmt_number(., columns = ni, decimals = 0, use_seps = TRUE),
~ fmt_number(., columns = hi_pct, decimals = 2),
~ fmt_number(., columns = hi_prop, decimals = 4)
)
) %>%
tab_source_note("Autor: Grupo 5") %>%
tab_options(
table.width = pct(75),
table.font.size = px(13),
table.font.names = "Arial",
heading.title.font.size = px(15),
heading.subtitle.font.size = px(12),
heading.align = "center",
heading.background.color = "#AAAAAA",
column_labels.font.weight = "bold",
column_labels.background.color = "#FFFFFF",
column_labels.border.top.color = "#AAAAAA",
column_labels.border.bottom.color = "#AAAAAA",
table.border.top.color = "#AAAAAA",
table.border.bottom.color = "#AAAAAA"
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = c("title", "subtitle"))
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = list(cells_column_labels(), cells_column_spanners(), cells_grand_summary())
)| Tabla N. 1 | |||||
| Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas | |||||
| N° | Tipo de combustible | nᵢ |
hᵢ
|
||
|---|---|---|---|---|---|
| (%) | (proporción) | ||||
| 1 | oil and gas | 5,833 | 69.99 | 0.6999 | |
| 2 | gas | 1,237 | 14.84 | 0.1484 | |
| 3 | oil | 1,233 | 14.79 | 0.1479 | |
| 4 | gas and condensate | 31 | 0.37 | 0.0037 | |
| Total | — | — | 8,334 | 100.00 | 1.0000 |
| Autor: Grupo 5 | |||||
fuel_graf <- tabla_freq %>%
mutate(TipoCombustible = fct_reorder(TipoCombustible, ni))
colores <- c(
"oil and gas" = "#AED6F1",
"gas" = "#5DADE2",
"oil" = "#2E86C1",
"gas and condensate" = "#1A5276"
)
# Tema para barras verticales: grilla horizontal, eje x legible
tema_base <- theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 13),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0),
axis.title = element_text(face = "bold", size = 11),
axis.text.x = element_text(face = "bold", angle = 15, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "#EEEEEE"),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA)
)
pie_label <- paste0("n = ", format(n, big.mark = ","),
" | Fuente: Global Energy Monitor — GOGET 2023")ggplot(fuel_graf, aes(x = TipoCombustible, y = ni, fill = TipoCombustible)) +
geom_col(width = 0.55, color = "white") +
geom_text(aes(label = format(ni, big.mark = ",")),
vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = colores) +
scale_y_continuous(labels = label_comma(),
expand = expansion(mult = c(0, 0.12))) +
labs(title = "Gráfica N. 1: Distribución de yacimientos por tipo de combustible",
x = "Tipo de combustible", y = "Frecuencia Absoluta (nᵢ)",
caption = pie_label) +
tema_baseggplot(fuel_graf, aes(x = TipoCombustible, y = hi_pct, fill = TipoCombustible)) +
geom_col(width = 0.55, color = "white") +
geom_text(aes(label = paste0(round(hi_pct, 2), "%")),
vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = colores) +
scale_y_continuous(labels = function(x) paste0(x, "%"),
expand = expansion(mult = c(0, 0.12))) +
labs(title = "Gráfica N. 2: Distribución porcentual por tipo de combustible",
x = "Tipo de combustible", y = "Frecuencia Relativa (%)",
caption = pie_label) +
tema_basetabla_freq %>%
mutate(TipoCombustible = fct_reorder(TipoCombustible, hi_pct),
etiqueta = paste0(round(hi_pct, 1), "%")) %>%
ggplot(aes(x = "", y = hi_pct, fill = TipoCombustible)) +
geom_col(width = 1, color = "white") +
geom_text(aes(label = etiqueta),
position = position_stack(vjust = 0.5),
size = 4, fontface = "bold") +
coord_polar(theta = "y") +
scale_fill_manual(values = colores) +
labs(title = "Gráfica N. 3: Distribución porcentual por tipo de combustible",
fill = "Tipo de combustible",
caption = pie_label) +
theme_void(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0.5),
legend.position = "right",
legend.title = element_text(face = "bold", size = 10),
plot.background = element_rect(fill = "white", color = NA)
)La variable Tipo de Combustible es cualitativa nominal. Para este tipo de variable, el único indicador de tendencia central aplicable es la moda.
moda <- tabla_freq$TipoCombustible[which.max(tabla_freq$ni)]
moda_n <- max(tabla_freq$ni)
moda_pct <- round(tabla_freq$hi_pct[which.max(tabla_freq$ni)], 2)
data.frame(
"Variable" = "Fuel Type",
"Rango" = "D={'Oil', 'Gas', 'Oil and gas', 'gas and condensate'}",
"Media (X)" = "-",
"Mediana (Me)" = "-",
"Moda (Mo)" = moda,
"Varianza (V)" = "-",
"Desv. Est. (Sd)" = "-",
"C.V. (%)" = "-",
"Asimetría (As)" = "-",
"Curtosis (K)" = "-",
check.names = FALSE
) %>%
gt() %>%
tab_header(
title = md("**Tabla N°2 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas**")
) %>%
tab_source_note("Autor: Grupo 5") %>%
tab_options(
table.width = pct(100),
table.font.size = px(12),
table.font.names = "Arial",
heading.align = "center",
heading.title.font.size = px(13),
heading.background.color = "#AAAAAA",
heading.border.bottom.color = "#AAAAAA",
column_labels.font.weight = "normal",
column_labels.background.color = "#FFFFFF",
column_labels.border.top.color = "#CCCCCC",
column_labels.border.top.width = px(1),
column_labels.border.bottom.color = "#CCCCCC",
column_labels.border.bottom.width = px(1),
table_body.border.bottom.color = "#CCCCCC",
table_body.border.bottom.width = px(1),
table.border.top.color = "#AAAAAA",
table.border.top.width = px(1),
table.border.bottom.color = "#AAAAAA",
table.border.bottom.width = px(1),
source_notes.font.size = px(11),
source_notes.border.lr.color = "transparent",
data_row.padding = px(5)
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = cell_text(color = "#333333", align = "center"),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(color = "#333333", align = "center"),
locations = cells_body(columns = c("Media (X)", "Mediana (Me)", "Moda (Mo)",
"Varianza (V)", "Desv. Est. (Sd)",
"C.V. (%)", "Asimetría (As)", "Curtosis (K)"))
)| Tabla N°2 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas | |||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) |
|---|---|---|---|---|---|---|---|---|---|
| Fuel Type | D={'Oil', 'Gas', 'Oil and gas', 'gas and condensate'} | - | - | oil and gas | - | - | - | - | - |
| Autor: Grupo 5 | |||||||||
La variable “tipo de combustible” tiene como valor más frecuente “Petróleo y Gas”, con una participación destacada en la muestra