En esta sección se cargan las librerías necesarias y se importa el dataset Global Oil and Gas Extraction Tracker (GOGET), que contiene registros de unidades de extracción de petróleo y gas a nivel mundial.
setwd("C:/Users/ronny/Downloads/Dataset")
datos_raw <- read_excel("dataset_mundial_petro.xlsx")
# Filtrar solo registros con Fuel Type válido (excluir filas secundarias/nulas)
datos <- datos_raw %>%
filter(!is.na(`Fuel type`), !is.na(`Unit type`))
cat("Número de registros válidos:", nrow(datos), "\n")## Número de registros válidos: 8334
## Número de variables: 32
Se extrae la variable Fuel Type (Tipo de Combustible) del dataset. Esta variable es de escala nominal, por lo que el análisis de distribución de frecuencias aplica categorías sin jerarquía intrínseca.
n <- nrow(datos)
conteo_inicial <- datos %>%
count(`Fuel type`, name = "fi") %>%
arrange(desc(fi))
k <- nrow(conteo_inicial)
cat("Total de categorías (Fuel Type):", k, "\n")## Total de categorías (Fuel Type): 4
cat("Categoría más frecuente:", conteo_inicial$`Fuel type`[1],
"con", conteo_inicial$fi[1], "registros\n")## Categoría más frecuente: oil and gas con 5833 registros
cat("Categoría menos frecuente:", conteo_inicial$`Fuel type`[k],
"con", conteo_inicial$fi[k], "registro(s)\n")## Categoría menos frecuente: gas and condensate con 31 registro(s)
Vista previa del conteo:
Dado que Fuel Type es una variable cualitativa nominal con 4 categorías, se organiza en orden descendente por frecuencia absoluta (fi).
tabla_freq <- conteo_inicial %>%
rename(FuelType = `Fuel type`) %>%
mutate(
hi_prop = fi / n,
hi_pct = hi_prop * 100,
Fi = cumsum(fi),
Hi_prop = cumsum(hi_prop),
Hi_pct = cumsum(hi_pct)
) %>%
mutate(i = row_number()) %>%
select(i, FuelType, fi, hi_pct, hi_prop)
cat("Tabla generada con", nrow(tabla_freq), "categorías.\n")## Tabla generada con 4 categorías.
## Verificación — suma de fi: 8334 (debe ser 8334 )
## Verificación — suma de hi (%): 100 (debe ser 100)
tabla_freq %>%
gt() %>%
tab_header(
title = md("**Tabla N. 9**"),
subtitle = md("Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas")
) %>%
cols_label(
i = md("**N°**"),
FuelType = md("**Tipo de combustible**"),
fi = md("**ni**"),
hi_pct = md("**(%)** "),
hi_prop = md("**(proporción)**")
) %>%
tab_spanner(
label = md("**hi**"),
columns = c(hi_pct, hi_prop)
) %>%
fmt_number(columns = hi_pct, decimals = 2) %>%
fmt_number(columns = hi_prop, decimals = 3) %>%
fmt_number(columns = fi, decimals = 0, use_seps = TRUE) %>%
grand_summary_rows(
columns = c(fi, hi_pct, hi_prop),
fns = list(label = "Total", fn = "sum"),
fmt = list(
~ fmt_number(., columns = fi, decimals = 0, use_seps = TRUE),
~ fmt_number(., columns = hi_pct, decimals = 2),
~ fmt_number(., columns = hi_prop, decimals = 3)
)
) %>%
tab_source_note(source_note = "Autor: Grupo 5") %>%
tab_options(
table.width = pct(75),
table.font.size = px(13),
table.font.names = "Arial",
heading.title.font.size = px(15),
heading.subtitle.font.size = px(12),
heading.align = "center",
heading.background.color = "#AAAAAA",
heading.border.bottom.color = "#AAAAAA",
heading.border.bottom.width = px(1),
column_labels.font.weight = "bold",
column_labels.background.color = "#FFFFFF",
column_labels.border.top.color = "#AAAAAA",
column_labels.border.top.width = px(1),
column_labels.border.bottom.color = "#AAAAAA",
column_labels.border.bottom.width = px(1),
row.striping.include_table_body = FALSE,
source_notes.font.size = px(11),
source_notes.border.lr.color = "transparent",
table.border.top.color = "#AAAAAA",
table.border.top.width = px(1),
table.border.bottom.color = "#AAAAAA",
table.border.bottom.width = px(1)
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = c("title", "subtitle"))
) %>%
tab_style(
style = cell_text(color = "#000000", weight = "bold"),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(color = "#000000", weight = "bold"),
locations = cells_column_spanners()
) %>%
tab_style(
style = list(
cell_text(weight = "bold", color = "#000000"),
cell_borders(sides = "top", color = "#333333", weight = px(2))
),
locations = cells_grand_summary()
) %>%
tab_style(
style = cell_text(color = "#333333"),
locations = cells_body()
)| Tabla N. 9 | |||||
| Distribución de frecuencias por tipo de combustible — yacimientos de petróleo y gas | |||||
| N° | Tipo de combustible | ni |
hi
|
||
|---|---|---|---|---|---|
| (%) | (proporción) | ||||
| 1 | oil and gas | 5,833 | 69.99 | 0.700 | |
| 2 | gas | 1,237 | 14.84 | 0.148 | |
| 3 | oil | 1,233 | 14.79 | 0.148 | |
| 4 | gas and condensate | 31 | 0.37 | 0.004 | |
| Total | — | — | 8,334 | 100.00 | 1.000 |
| Autor: Grupo 5 | |||||
Para la variable cualitativa nominal Fuel Type, se presentan diagramas de barras (frecuencia absoluta y porcentual) y diagrama circular.
# Preparar datos para gráficos
fuel_graf <- tabla_freq %>%
mutate(FuelType = fct_reorder(FuelType, fi))
# Paleta azules suaves
colores_fuel <- c(
"oil and gas" = "#AED6F1",
"gas" = "#5DADE2",
"oil" = "#2E86C1",
"gas and condensate"= "#1A5276"
)
# Tema base limpio
tema_base <- theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", color = "#1A1A1A", size = 13),
plot.subtitle = element_text(color = "#AAAAAA", size = 10),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0),
axis.title = element_text(face = "bold", color = "#333333", size = 11),
axis.text = element_text(color = "#333333"),
axis.text.y = element_text(face = "bold", color = "#222222"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "#EEEEEE"),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
)ggplot(fuel_graf, aes(x = FuelType, y = fi, fill = FuelType)) +
geom_col(width = 0.55, color = "white", linewidth = 0.3) +
geom_text(
aes(label = format(fi, big.mark = ",")),
hjust = -0.1, size = 3.5, color = "#222222", fontface = "bold"
) +
coord_flip() +
scale_fill_manual(values = colores_fuel) +
scale_y_continuous(
labels = label_comma(),
expand = expansion(mult = c(0, 0.18))
) +
labs(
title = "Gráfica N. 1: Distribución de yacimientos por tipo de combustible",
x = "Fuel Type",
y = "Frecuencia Absoluta (ni)",
caption = paste0("n = ", format(n, big.mark = ","),
" | Fuente: Global Energy Monitor — GOGET 2023")
) +
tema_baseggplot(fuel_graf, aes(x = FuelType, y = hi_pct, fill = FuelType)) +
geom_col(width = 0.55, color = "white", linewidth = 0.3) +
geom_text(
aes(label = paste0(round(hi_pct, 2), "%")),
hjust = -0.1, size = 3.5, color = "#222222", fontface = "bold"
) +
coord_flip() +
scale_fill_manual(values = colores_fuel) +
scale_y_continuous(
labels = function(x) paste0(x, "%"),
expand = expansion(mult = c(0, 0.18))
) +
labs(
title = "Gráfica N. 2: Distribución porcentual de yacimientos por tipo de combustible",
x = "Fuel Type",
y = "Frecuencia Relativa (%)",
caption = paste0("n = ", format(n, big.mark = ","),
" | Fuente: Global Energy Monitor — GOGET 2023")
) +
tema_basedatos_pie <- tabla_freq %>%
arrange(desc(fi)) %>%
mutate(
FuelType = fct_reorder(FuelType, hi_pct),
etiqueta = paste0(round(hi_pct, 1), "%")
)
ggplot(datos_pie, aes(x = "", y = hi_pct, fill = FuelType)) +
geom_col(width = 1, color = "white", linewidth = 0.6) +
geom_text(
aes(label = etiqueta),
position = position_stack(vjust = 0.5),
size = 4, color = "#1A1A1A", fontface = "bold"
) +
coord_polar(theta = "y", start = 0) +
scale_fill_manual(values = colores_fuel) +
labs(
title = "Gráfica N. 3: Distribución Porcentual por Tipo de Combustible",
fill = "Fuel Type",
caption = paste0("n = ", format(n, big.mark = ","),
" | Fuente: Global Energy Monitor — GOGET 2023")
) +
theme_void(base_size = 12) +
theme(
plot.title = element_text(face = "bold", color = "#1A1A1A",
size = 13, hjust = 0.5),
plot.caption = element_text(color = "#888888", size = 9, hjust = 0.5),
legend.position = "right",
legend.title = element_text(face = "bold", color = "#1A1A1A", size = 10),
legend.text = element_text(size = 9, color = "#333333"),
plot.background = element_rect(fill = "white", color = NA)
)# Moda
moda_fuel <- tabla_freq$FuelType[which.max(tabla_freq$fi)]
# Tabla de indicadores
tabla_indicadores <- data.frame(
"Variable" = "Fuel Type",
"Rango" = "Tipos de combustible",
"Media (X)" = "-",
"Mediana (Me)" = "-",
"Moda (Mo)" = moda_fuel,
"Varianza (V)" = "-",
"Desv. Est. (Sd)" = "-",
"C.V. (%)" = "-",
"Asimetría (As)" = "-",
"Curtosis (K)" = "-",
check.names = FALSE
)tabla_indicadores %>%
gt() %>%
tab_header(
title = md("**Tabla N°3 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas**")
) %>%
tab_source_note(source_note = "Autor: Grupo 5") %>%
tab_options(
table.width = pct(95),
table.font.size = px(13),
table.font.names = "Arial",
heading.title.font.size = px(15),
heading.align = "center",
heading.background.color = "#AAAAAA",
heading.border.bottom.color = "#AAAAAA",
heading.border.bottom.width = px(1),
column_labels.font.weight = "bold",
column_labels.background.color = "#F0F0F0",
column_labels.border.top.color = "#AAAAAA",
column_labels.border.top.width = px(1),
column_labels.border.bottom.color = "#AAAAAA",
column_labels.border.bottom.width = px(1),
row.striping.include_table_body = FALSE,
source_notes.font.size = px(11),
source_notes.border.lr.color = "transparent",
table.border.top.color = "#AAAAAA",
table.border.top.width = px(1),
table.border.bottom.color = "#AAAAAA",
table.border.bottom.width = px(2)
) %>%
tab_style(
style = cell_text(color = "white", weight = "bold"),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = cell_text(color = "#000000", weight = "bold"),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(color = "#AAAAAA"),
locations = cells_body()
)| Tabla N°3 de Conclusiones — Tipo de Combustible en yacimientos de petróleo y gas | |||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) |
|---|---|---|---|---|---|---|---|---|---|
| Fuel Type | Tipos de combustible | - | - | oil and gas | - | - | - | - | - |
| Autor: Grupo 5 | |||||||||