1 Introducción y Metodología

El presente informe estadístico analiza la variable Profundidad de Perforación de pozos petroleros de Brasil.

# 1. LIBRERÍAS Y CARGA DE DATOS

if(!require(readxl)) install.packages("readxl")
if(!require(dplyr)) install.packages("dplyr")
if(!require(gt)) install.packages("gt")
if(!require(e1071)) install.packages("e1071")

library(readxl)
library(dplyr)
library(gt)
library(e1071)

# Carga de datos
Datos_Brutos <- read_excel("tabela_de_pocos_janeiro_2018.xlsx", sheet = 1)

vars_to_keep <- c("POCO", "OPERADOR", "ESTADO", "BACIA", "DIRECAO", 
                  "TIPO", "SITUACAO", "PROFUNDIDADE_SONDADOR_M", "LAMINA_D_AGUA_M")

Datos <- Datos_Brutos %>%
  select(any_of(vars_to_keep)) %>%
  mutate(temp_var = as.character(PROFUNDIDADE_SONDADOR_M)) %>%
  mutate(temp_var = gsub("\\.", "", temp_var)) %>%
  mutate(temp_var = gsub(",", ".", temp_var)) %>%
  mutate(PROFUNDIDADE_SONDADOR_M = as.numeric(temp_var))
Variable <- na.omit(Datos$PROFUNDIDADE_SONDADOR_M)
Variable <- Variable[Variable > 0] 


# 2. CÁLCULOS MATEMÁTICOS (STURGES)

N <- length(Variable)
if(N == 0) stop("ERROR CRÍTICO: No hay datos válidos. Revise la limpieza de datos.")

min_val <- min(Variable)
max_val <- max(Variable)
Rango <- max_val - min_val
K <- floor(1 + 3.322 * log10(N)) 
Amplitud <- Rango / K

breaks_raw <- seq(min_val, max_val, length.out = K + 1)
breaks_raw[length(breaks_raw)] <- max_val + 0.0001 

lim_inf_raw <- breaks_raw[1:K]
lim_sup_raw <- breaks_raw[2:(K+1)]
MC <- (lim_inf_raw + lim_sup_raw) / 2

# Cálculo de Frecuencias
ni <- numeric(K)
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(Variable[Variable >= lim_inf_raw[i] & Variable < lim_sup_raw[i]])
  } else {
    ni[i] <- length(Variable[Variable >= lim_inf_raw[i] & Variable <= lim_sup_raw[i]])
  }
}

hi <- (ni / sum(ni)) * 100 
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))

# Dataframe Visualización
TDF_Profundidad <- data.frame(
  Li = round(lim_inf_raw, 2), 
  Ls = round(lim_sup_raw, 2), 
  MC = round(MC, 2),            
  ni = ni, 
  hi = round(hi, 2),
  Ni_asc = Ni_asc, 
  Ni_desc = Ni_desc, 
  Hi_asc = round(Hi_asc, 2), 
  Hi_desc = round(Hi_desc, 2)
)

2 Distribución de Frecuencias

A continuación se presenta la tabla de distribución de frecuencias obtenida.

# Totales
totales <- c("TOTAL", "-", "-", sum(ni), round(sum(hi), 2), "-", "-", "-", "-")
TDF_Char <- TDF_Profundidad %>% mutate(across(everything(), as.character))
TDF_Final <- rbind(TDF_Char, totales)

TDF_Final %>%
  gt() %>%
  tab_header(
    title = md("**DISTRIBUCIÓN DE FRECUENCIAS**"),
    subtitle = md("Variable: **Profundidad de Perforación (m)**")
  ) %>%
  tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
  cols_label(
    Li = "Lím. Inf", Ls = "Lím. Sup", MC = "Marca Clase (Xi)", 
    ni = "ni", hi = "hi (%)", 
    Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_style(
    style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
    locations = cells_title()
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#F2F3F4"), cell_text(weight = "bold", color = "#2E4053")),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.border.top.color = "#2E4053",
    table.border.bottom.color = "#2E4053",
    column_labels.border.bottom.color = "#2E4053",
    data_row.padding = px(6)
  )
DISTRIBUCIÓN DE FRECUENCIAS
Variable: Profundidad de Perforación (m)
Lím. Inf Lím. Sup Marca Clase (Xi) ni hi (%) Ni (Asc) Ni (Desc) Hi (Asc) Hi (Desc)
5 47175.53 23590.27 26256 99.79 26256 26312 99.79 100
47175.53 94346.07 70760.8 30 0.11 26286 56 99.9 0.21
94346.07 141516.6 117931.33 0 0 26286 26 99.9 0.1
141516.6 188687.13 165101.87 0 0 26286 26 99.9 0.1
188687.13 235857.67 212272.4 1 0 26287 26 99.9 0.1
235857.67 283028.2 259442.93 1 0 26288 25 99.91 0.1
283028.2 330198.73 306613.47 5 0.02 26293 24 99.93 0.09
330198.73 377369.27 353784 6 0.02 26299 19 99.95 0.07
377369.27 424539.8 400954.53 6 0.02 26305 13 99.97 0.05
424539.8 471710.33 448125.07 2 0.01 26307 7 99.98 0.03
471710.33 518880.87 495295.6 2 0.01 26309 5 99.99 0.02
518880.87 566051.4 542466.13 2 0.01 26311 3 100 0.01
566051.4 613221.93 589636.67 0 0 26311 1 100 0
613221.93 660392.47 636807.2 0 0 26311 1 100 0
660392.47 707563 683977.73 1 0 26312 1 100 0
TOTAL - - 26312 100 - - - -
Fuente: Datos ANP 2018

3 Análisis Gráfico

Esta sección presenta la visualización de la distribución de los datos.

3.1 Histogramas de Frecuencia

col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
h_base <- hist(Variable, breaks = breaks_raw, right = FALSE, plot = FALSE)
ylim_max <- if(max(ni) > 0) max(ni) * 1.1 else 10

# GRÁFICO 1: Histograma Absoluto (Local)
par(mar = c(8, 5, 4, 2)) 
plot(h_base, 
     main = "Gráfica No.1: Distribución de Profundidad de Perforación (Local)",
     xlab = "Profundidad de Perforación (m)",
     ylab = "Frecuencia Absoluta",
     col = col_gris_azulado, border = "white", axes = FALSE,  
     ylim = c(0, ylim_max))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") 

# GRÁFICO 2: Histograma Global
par(mar = c(8, 5, 4, 2))
plot(h_base, 
     main = "Gráfica N°2: Distribución de Profundidad de Perforación (Global)",
     xlab = "Profundidad de Perforación (m)",
     ylab = "Total Pozos",
     col = col_gris_azulado, border = "white", axes = FALSE, 
     ylim = c(0, sum(ni))) 
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")

3.2 Gráficos Porcentuales

h_porc <- h_base
h_porc$counts <- hi
h_porc$density <- hi
ylim_porc <- if(max(hi) > 0) max(hi) * 1.2 else 100

# GRÁFICO 3: Porcentajes (Local)
par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica N°3: Distribución Porcentual de Profundidad de Perforación (Local)",
     xlab = " Profundidad de Perforación (m)",
     ylab = "Porcentaje (%)",
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, ylim_porc))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids, y = hi, label = paste0(round(hi, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") 

# GRÁFICO 4: Global Porcentual
par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica No.4: Distribución Porcentual de Profundidad de Perforación (Global)",
     xlab = "Profundidad de Perforación (m)",
     ylab = "% del Total", 
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, 100))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
abline(h=seq(0,100,20), col="#D7DBDD", lty="dotted")

3.3 Diagrama de Caja y Ojivas

# GRÁFICO 5: Boxplot 
par(mar = c(5, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado, 
        main = "Gráfica No.5: Diagrama de Caja de Profundidad de Perforación (Boxplot)",
        xlab = "Profundidad (m)", outline = TRUE, outpch = 19, outcol = "#C0392B", 
        boxwex = 0.5, frame.plot = FALSE, xaxt = "n") 
eje_x_detallado <- pretty(Variable, n = 20) 
axis(1, at = eje_x_detallado, labels = format(eje_x_detallado, scientific = FALSE), cex.axis=0.7, las=2)
grid(nx=NULL, ny=NA, col="lightgray", lty="dotted")

# GRÁFICO 6: Ojivas 
par(mar = c(5, 5, 4, 8), xpd = TRUE) 
x_asc <- c(min(breaks_raw), breaks_raw[2:length(breaks_raw)])
y_asc <- c(0, Ni_asc)
x_desc <- c(breaks_raw[1:(length(breaks_raw)-1)], max(breaks_raw))
y_desc <- c(Ni_desc, 0)
if(length(x_desc) != length(y_desc)) {
  x_desc <- c(TDF_Profundidad$Li, max(TDF_Profundidad$Ls))
  y_desc <- c(TDF_Profundidad$Ni_desc, 0)
}
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
col_azul <- "#2E4053"
col_rojo <- "#C0392B"

plot(x_asc, y_asc, type = "o", col = col_azul, lwd=2, pch=19,
     main = "Gráfica No.6: Ojivas Ascendente y Descendente de Profundidad de Perforación",
     xlab = "Profundidad (m)", ylab = "Frecuencia acumulada",
     xlim = x_range, ylim = y_range, axes = FALSE, frame.plot = FALSE)
axis(1, at = round(breaks_raw,0), labels = format(round(breaks_raw,0), scientific = FALSE), las=2, cex.axis=0.6)
axis(2, at = pretty(y_asc), labels = format(pretty(y_asc), scientific = FALSE))
lines(x_desc, y_desc, type = "o", col = col_rojo, lwd=2, pch=19)
legend("right", legend = c("Ascendente", "Descendente"),
       col = c(col_azul, col_rojo), lty = 1, pch = 19, cex = 0.7, lwd=2,
       inset = c(-0.3, 0), bty="n")
grid()

4 Resumen Estadístico

tabla_ind <- data.frame(
  Variable = "Profundidad de Perforación (m)", 
  Media = round(mean(Variable), 2), 
  Mediana = round(median(Variable), 2),
  Moda = paste(round(TDF_Profundidad$MC[TDF_Profundidad$ni == max(TDF_Profundidad$ni)], 2), collapse=", "),
  Varianza = round(var(Variable), 2), 
  Desv_Est = round(sd(Variable), 2), 
  CV_Porc = round((sd(Variable)/mean(Variable))*100, 2),
  Asimetria = round(skewness(Variable), 4), 
  Curtosis = round(kurtosis(Variable), 2)
)

gt(tabla_ind) %>%
  tab_header(
    title = md("**RESUMEN ESTADÍSTICO**"),
    subtitle = md("Variable: **Profundidad de Perforación (m)**") 
  ) %>%
  fmt_number(columns = 2:9, decimals = 2) %>%
  tab_options(
    column_labels.background.color = "#2E4053",
    table.border.top.color = "black",
    table.border.bottom.color = "#2E4053",
    column_labels.border.bottom.color = "#2E4053",
    data_row.padding = px(6)
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold", color = "white")),
    locations = cells_column_labels()
  )
RESUMEN ESTADÍSTICO
Variable: Profundidad de Perforación (m)
Variable Media Mediana Moda Varianza Desv_Est CV_Porc Asimetria Curtosis
Profundidad de Perforación (m) 2,960.63 1,265.00 23590.27 179,929,355.11 13,413.77 453.07 29.23 1,031.66