1 Configuración y Carga de Datos

En esta sección se cargan las librerías necesarias y se importa el dataset Global Oil and Gas Extraction Tracker (GOGET), que contiene registros de unidades de extracción de petróleo y gas a nivel mundial.

library(readxl)
library(dplyr)
library(gt)
library(ggplot2)
library(scales)
library(forcats)
setwd("C:/Users/ronny/Downloads/Dataset")
datos_raw <- read_excel("dataset_mundial_petro.xlsx")

# Filtrar solo registros con Fuel Type válido (excluir filas secundarias/nulas)
datos <- datos_raw %>%
  filter(!is.na(`Fuel type`), !is.na(`Unit type`))

cat("Número de registros válidos:", nrow(datos), "\n")
## Número de registros válidos: 8334
cat("Número de variables:", ncol(datos), "\n")
## Número de variables: 32

2 Extracción y Conteo Inicial

Se extrae la variable Fuel Type (Tipo de Combustible) del dataset. Esta variable es de escala nominal, por lo que el análisis de distribución de frecuencias aplica categorías sin jerarquía intrínseca.

n <- nrow(datos)

conteo_inicial <- datos %>%
  count(`Fuel type`, name = "fi") %>%
  arrange(desc(fi))

k <- nrow(conteo_inicial)
cat("Total de categorías (Fuel Type):", k, "\n")
## Total de categorías (Fuel Type): 4
cat("Categoría más frecuente:", conteo_inicial$`Fuel type`[1],
    "con", conteo_inicial$fi[1], "registros\n")
## Categoría más frecuente: oil and gas con 5833 registros
cat("Categoría menos frecuente:", conteo_inicial$`Fuel type`[k],
    "con", conteo_inicial$fi[k], "registro(s)\n")
## Categoría menos frecuente: gas and condensate con 31 registro(s)

Vista previa del conteo:

conteo_inicial

3 Categorización y Ordenamiento Lógico

Dado que Fuel Type es una variable cualitativa nominal con 4 categorías, se organiza en orden descendente por frecuencia absoluta (fi).

tabla_freq <- conteo_inicial %>%
  rename(FuelType = `Fuel type`) %>%
  mutate(
    hi_prop = fi / n,
    hi_pct  = hi_prop * 100,
    Fi      = cumsum(fi),
    Hi_prop = cumsum(hi_prop),
    Hi_pct  = cumsum(hi_pct)
  ) %>%
  mutate(i = row_number()) %>%
  select(i, FuelType, fi, hi_pct, hi_prop)

cat("Tabla generada con", nrow(tabla_freq), "categorías.\n")
## Tabla generada con 4 categorías.
cat("Verificación — suma de fi:", sum(tabla_freq$fi), "(debe ser", n, ")\n")
## Verificación — suma de fi: 8334 (debe ser 8334 )
cat("Verificación — suma de hi (%):", round(sum(tabla_freq$hi_pct), 3), "(debe ser 100)\n")
## Verificación — suma de hi (%): 100 (debe ser 100)

4 Tabla de Distribución de Frecuencias

tabla_freq %>%
  gt() %>%
  tab_header(
    title    = md("**Tabla N. 9**"),
    subtitle = md("Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas")
  ) %>%
  cols_label(
    i        = md("**N°**"),
    FuelType = md("**Tipo de combustible**"),
    fi       = md("**ni**"),
    hi_pct   = md("**(%)** "),
    hi_prop  = md("**(proporción)**")
  ) %>%
  tab_spanner(
    label   = md("**hi**"),
    columns = c(hi_pct, hi_prop)
  ) %>%
  fmt_number(columns = hi_pct,  decimals = 2) %>%
  fmt_number(columns = hi_prop, decimals = 3) %>%
  fmt_number(columns = fi,      decimals = 0, use_seps = TRUE) %>%
  grand_summary_rows(
    columns = c(fi, hi_pct, hi_prop),
    fns = list(label = "Total", fn = "sum"),
    fmt = list(
      ~ fmt_number(., columns = fi,      decimals = 0, use_seps = TRUE),
      ~ fmt_number(., columns = hi_pct,  decimals = 2),
      ~ fmt_number(., columns = hi_prop, decimals = 3)
    )
  ) %>%
  tab_source_note(source_note = "Autor: Grupo 5") %>%
  tab_options(
    table.width                       = pct(75),
    table.font.size                   = px(13),
    table.font.names                  = "Arial",
    heading.title.font.size           = px(15),
    heading.subtitle.font.size        = px(12),
    heading.align                     = "center",
    heading.background.color          = "#AAAAAA",
    heading.border.bottom.color       = "#AAAAAA",
    heading.border.bottom.width       = px(1),
    column_labels.font.weight         = "bold",
    column_labels.background.color    = "#FFFFFF",
    column_labels.border.top.color    = "#AAAAAA",
    column_labels.border.top.width    = px(1),
    column_labels.border.bottom.color = "#AAAAAA",
    column_labels.border.bottom.width = px(1),
    row.striping.include_table_body   = FALSE,
    source_notes.font.size            = px(11),
    source_notes.border.lr.color      = "transparent",
    table.border.top.color            = "#AAAAAA",
    table.border.top.width            = px(1),
    table.border.bottom.color         = "#AAAAAA",
    table.border.bottom.width         = px(1)
  ) %>%
  tab_style(
    style     = cell_text(color = "white", weight = "bold"),
    locations = cells_title(groups = c("title", "subtitle"))
  ) %>%
  tab_style(
    style     = cell_text(color = "#000000", weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style     = cell_text(color = "#000000", weight = "bold"),
    locations = cells_column_spanners()
  ) %>%
  tab_style(
    style     = list(
      cell_text(weight = "bold", color = "#000000"),
      cell_borders(sides = "top", color = "#333333", weight = px(2))
    ),
    locations = cells_grand_summary()
  ) %>%
  tab_style(
    style     = cell_text(color = "#333333"),
    locations = cells_body()
  )
Tabla N. 9
Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas
Tipo de combustible ni
hi
(%) (proporción)
1 oil and gas 5,833 69.99 0.700
2 gas 1,237 14.84 0.148
3 oil 1,233 14.79 0.148
4 gas and condensate 31 0.37 0.004
Total 8,334 100.00 1.000
Autor: Grupo 5

5 Análisis Gráfico

Para la variable cualitativa nominal Fuel Type, se presentan diagramas de barras (frecuencia absoluta y porcentual) y diagrama circular.

# Preparar datos para gráficos
fuel_graf <- tabla_freq %>%
  mutate(FuelType = fct_reorder(FuelType, fi))

# Paleta azules suaves
colores_fuel <- c(
  "oil and gas"       = "#AED6F1",
  "gas"               = "#5DADE2",
  "oil"               = "#2E86C1",
  "gas and condensate"= "#1A5276"
)

# Tema base limpio
tema_base <- theme_minimal(base_size = 12) +
  theme(
    legend.position    = "none",
    plot.title         = element_text(face = "bold", color = "#1A1A1A", size = 13),
    plot.subtitle      = element_text(color = "#AAAAAA", size = 10),
    plot.caption       = element_text(color = "#888888", size = 9, hjust = 0),
    axis.title         = element_text(face = "bold", color = "#333333", size = 11),
    axis.text          = element_text(color = "#333333"),
    axis.text.y        = element_text(face = "bold", color = "#222222"),
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_line(color = "#EEEEEE"),
    panel.grid.minor   = element_blank(),
    plot.background    = element_rect(fill = "white", color = NA),
    panel.background   = element_rect(fill = "white", color = NA)
  )

5.1 Diagrama de Barras — Frecuencia Absoluta

ggplot(fuel_graf, aes(x = FuelType, y = fi, fill = FuelType)) +
  geom_col(width = 0.55, color = "white", linewidth = 0.3) +
  geom_text(
    aes(label = format(fi, big.mark = ",")),
    hjust = -0.1, size = 3.5, color = "#222222", fontface = "bold"
  ) +
  coord_flip() +
  scale_fill_manual(values = colores_fuel) +
  scale_y_continuous(
    labels = label_comma(),
    expand = expansion(mult = c(0, 0.18))
  ) +
  labs(
    title   = "Gráfica N. 1: Distribución de yacimientos por tipo de combustible",
    x       = "Fuel Type",
    y       = "Frecuencia Absoluta (ni)",
    caption = paste0("n = ", format(n, big.mark = ","),
                     " | Fuente: Global Energy Monitor — GOGET 2023")
  ) +
  tema_base

5.2 Diagrama de Barras — Frecuencia Relativa Porcentual

ggplot(fuel_graf, aes(x = FuelType, y = hi_pct, fill = FuelType)) +
  geom_col(width = 0.55, color = "white", linewidth = 0.3) +
  geom_text(
    aes(label = paste0(round(hi_pct, 2), "%")),
    hjust = -0.1, size = 3.5, color = "#222222", fontface = "bold"
  ) +
  coord_flip() +
  scale_fill_manual(values = colores_fuel) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%"),
    expand = expansion(mult = c(0, 0.18))
  ) +
  labs(
    title   = "Gráfica N. 2: Distribución porcentual de yacimientos por tipo de combustible",
    x       = "Fuel Type",
    y       = "Frecuencia Relativa (%)",
    caption = paste0("n = ", format(n, big.mark = ","),
                     " | Fuente: Global Energy Monitor — GOGET 2023")
  ) +
  tema_base

5.3 Diagrama Circular

datos_pie <- tabla_freq %>%
  arrange(desc(fi)) %>%
  mutate(
    FuelType = fct_reorder(FuelType, hi_pct),
    etiqueta = paste0(round(hi_pct, 1), "%")
  )

ggplot(datos_pie, aes(x = "", y = hi_pct, fill = FuelType)) +
  geom_col(width = 1, color = "white", linewidth = 0.6) +
  geom_text(
    aes(label = etiqueta),
    position = position_stack(vjust = 0.5),
    size = 4, color = "#1A1A1A", fontface = "bold"
  ) +
  coord_polar(theta = "y", start = 0) +
  scale_fill_manual(values = colores_fuel) +
  labs(
    title   = "Gráfica N. 3: Distribución Porcentual por Tipo de Combustible",
    fill    = "Fuel Type",
    caption = paste0("n = ", format(n, big.mark = ","),
                     " | Fuente: Global Energy Monitor — GOGET 2023")
  ) +
  theme_void(base_size = 12) +
  theme(
    plot.title      = element_text(face = "bold", color = "#1A1A1A",
                                   size = 13, hjust = 0.5),
    plot.caption    = element_text(color = "#888888", size = 9, hjust = 0.5),
    legend.position = "right",
    legend.title    = element_text(face = "bold", color = "#1A1A1A", size = 10),
    legend.text     = element_text(size = 9, color = "#333333"),
    plot.background = element_rect(fill = "white", color = NA)
  )


6 Indicadores Estadísticos

# Moda
moda_fuel <- tabla_freq$FuelType[which.max(tabla_freq$fi)]

# Tabla de indicadores
tabla_indicadores <- data.frame(
  "Variable"        = "Fuel Type",
  "Rango"           = "Tipos de combustible",
  "Media (X)"       = "-",
  "Mediana (Me)"    = "-",
  "Moda (Mo)"       = moda_fuel,
  "Varianza (V)"    = "-",
  "Desv. Est. (Sd)" = "-",
  "C.V. (%)"        = "-",
  "Asimetría (As)"  = "-",
  "Curtosis (K)"    = "-",
  check.names = FALSE
)
tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N°3 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas**")
  ) %>%
  tab_source_note(source_note = "Autor: Grupo 5") %>%
  tab_options(
    table.width                       = pct(95),
    table.font.size                   = px(13),
    table.font.names                  = "Arial",
    heading.title.font.size           = px(15),
    heading.align                     = "center",
    heading.background.color          = "#AAAAAA",
    heading.border.bottom.color       = "#AAAAAA",
    heading.border.bottom.width       = px(1),
    column_labels.font.weight         = "bold",
    column_labels.background.color    = "#F0F0F0",
    column_labels.border.top.color    = "#AAAAAA",
    column_labels.border.top.width    = px(1),
    column_labels.border.bottom.color = "#AAAAAA",
    column_labels.border.bottom.width = px(1),
    row.striping.include_table_body   = FALSE,
    source_notes.font.size            = px(11),
    source_notes.border.lr.color      = "transparent",
    table.border.top.color            = "#AAAAAA",
    table.border.top.width            = px(1),
    table.border.bottom.color         = "#AAAAAA",
    table.border.bottom.width         = px(2)
  ) %>%
  tab_style(
    style     = cell_text(color = "white", weight = "bold"),
    locations = cells_title(groups = "title")
  ) %>%
  tab_style(
    style     = cell_text(color = "#000000", weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style     = cell_text(color = "#AAAAAA"),
    locations = cells_body()
  )
Tabla N°3 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K)
Fuel Type Tipos de combustible - - oil and gas - - - - -
Autor: Grupo 5