Latitud Base de los Pozos Petroleros

1 Introducción y Metodología

# 1. LIBRERÍAS Y CARGA DE DATOS
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

#
setwd("C:/Users/Usuario/Desktop/TRABAJO DE ESTADISTICA/PDF-EXCEL-QGIS")
Datos_Brutos <- read.csv("Pozos Brasil 2.csv", header = TRUE, sep = ";", dec = ".", fileEncoding = "LATIN1")
colnames(Datos_Brutos) <- trimws(colnames(Datos_Brutos))
colnames(Datos_Brutos)
##  [1] "POCO"                       "CADASTRO"                  
##  [3] "OPERADOR"                   "POCO_OPERADOR"             
##  [5] "ESTADO"                     "BACIA"                     
##  [7] "BLOCO"                      "SIG_CAMPO"                 
##  [9] "CAMPO"                      "TERRA_MAR"                 
## [11] "POCO_POS_ANP"               "TIPO"                      
## [13] "CATEGORIA"                  "RECLASSIFICACAO"           
## [15] "SITUACAO"                   "INICIO"                    
## [17] "TERMINO"                    "CONCLUSAO"                 
## [19] "TITULARIDADE"               "LATITUDE_BASE_4C"          
## [21] "LONGITUDE_BASE_4C"          "LATITUDE_BASE_DD"          
## [23] "LONGITUDE_BASE_DD"          "DATUM_HORIZONTAL"          
## [25] "TIPO_DE_COORDENADA_DE_BASE" "DIRECAO"                   
## [27] "PROFUNDIDADE_VERTICAL_M"    "PROFUNDIDADE_SONDADOR_M"   
## [29] "PROFUNDIDADE_MEDIDA_M"      "REFERENCIA_DE_PROFUNDIDADE"
## [31] "MESA_ROTATIVA"              "COTA_ALTIMETRICA_M"        
## [33] "LAMINA_D_AGUA_M"            "DATUM_VERTICAL"            
## [35] "UNIDADE_ESTRATIGRAFICA"     "GEOLOGIA_GRUPO_FINAL"      
## [37] "GEOLOGIA_FORMACAO_FINAL"    "GEOLOGIA_MEMBRO_FINAL"     
## [39] "CDPE"                       "AGP"                       
## [41] "PC"                         "PAG"                       
## [43] "PERFIS_CONVENCIONAIS"       "DURANTE_PERFURACAO"        
## [45] "PERFIS_DIGITAIS"            "PERFIS_PROCESSADOS"        
## [47] "PERFIS_ESPECIAIS"           "AMOSTRA_LATERAL"           
## [49] "SISMICA"                    "TABELA_TEMPO_PROFUNDIDADE" 
## [51] "DADOS_DIRECIONAIS"          "TESTE_A_CABO"              
## [53] "TESTE_DE_FORMACAO"          "CANHONEIO"                 
## [55] "TESTEMUNHO"                 "GEOQUIMICA"                
## [57] "SIG_SONDA"                  "NOM_SONDA"                 
## [59] "DHA_ATUALIZACAO"
Datos <- Datos_Brutos %>%
  select(any_of(c("POCO", "LATITUDE_BASE_DD"))) %>%
  mutate(Variable_Analisis = as.numeric(gsub(",", ".", LATITUDE_BASE_DD)))

Variable <- na.omit(Datos$Variable_Analisis)
Variable <- Variable[Variable > -35 & Variable < 10]

head(Variable)
## [1] -21.96268 -23.10267 -21.13522 -21.13682 -21.13994 -12.13336
# 2. CÁLCULOS MATEMÁTICOS PARA LA TABLA

N <- length(Variable)
min_val <- min(Variable)
max_val <- max(Variable)
Rango <- max_val - min_val
K <- floor(1 + 3.322 * log10(N)) 
Amplitud <- Rango / K

breaks_table <- seq(min_val, max_val, length.out = K + 1)
breaks_table[length(breaks_table)] <- max_val + 0.0001 

lim_inf_table <- breaks_table[1:K]
lim_sup_table <- breaks_table[2:(K+1)]
MC <- (lim_inf_table + lim_sup_table) / 2

ni <- numeric(K)
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(Variable[Variable >= lim_inf_table[i] & Variable < lim_sup_table[i]])
  } else {
    ni[i] <- length(Variable[Variable >= lim_inf_table[i] & Variable <= lim_sup_table[i]])
  }
}

hi <- (ni / sum(ni)) * 100 
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))

TDF_Latitud <- data.frame(
  Li = round(lim_inf_table, 4), 
  Ls = round(lim_sup_table, 4), 
  MC = round(MC, 4),            
  ni = ni, 
  hi = round(hi, 2),
  Ni_asc = Ni_asc, 
  Ni_desc = Ni_desc, 
  Hi_asc = round(Hi_asc, 2), 
  Hi_desc = round(Hi_desc, 2)
)

2 Distribución de Frecuencias

# Totales
totales <- c("TOTAL", "-", "-", sum(ni), round(sum(hi), 2), "-", "-", "-", "-")
TDF_Char <- TDF_Latitud %>% mutate(across(everything(), as.character))
TDF_Final <- rbind(TDF_Char, totales)

TDF_Final %>%
  gt() %>%
  tab_header(
    title = md("**DISTRIBUCIÓN DE FRECUENCIAS DE POZOS PETROLEROS DE BRASIL**"),
    subtitle = md("**Variable: LATITUD BASE**")
  ) %>%
  tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
  cols_label(
    Li = "Lim Inf", 
    Ls = "Lim Sup", 
    MC = "Marca Clase (Xi)", 
    ni = "ni", 
    hi = "hi (%)", 
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi (Asc)", 
    Hi_desc = "Hi (Desc)"
  ) %>%
  cols_align(
    align = "center", 
    columns = everything()
  ) %>%
  tab_style(
    style = list(
      cell_fill(color = "#2E4053"), 
      cell_text(color = "white", weight = "bold")
    ),
    locations = cells_title()
  ) %>%
  tab_style(
    style = list(
      cell_fill(color = "#F2F3F4"), 
      cell_text(weight = "bold", color = "#2E4053")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.border.top.color = "#2E4053",
    table.border.bottom.color = "#2E4053",
    column_labels.border.bottom.color = "#2E4053",
    data_row.padding = px(6)
  )
DISTRIBUCIÓN DE FRECUENCIAS DE POZOS PETROLEROS DE BRASIL
Variable: LATITUD BASE
Lim Inf Lim Sup Marca Clase (Xi) ni hi (%) Ni (Asc) Ni (Desc) Hi (Asc) Hi (Desc)
-32.9266 -30.4296 -31.6781 12 0.04 12 29575 0.04 100
-30.4296 -27.9326 -29.1811 18 0.06 30 29563 0.1 99.96
-27.9326 -25.4357 -26.6841 303 1.02 333 29545 1.13 99.9
-25.4357 -22.9387 -24.1872 877 2.97 1210 29242 4.09 98.87
-22.9387 -20.4417 -21.6902 3079 10.41 4289 28365 14.5 95.91
-20.4417 -17.9447 -19.1932 2029 6.86 6318 25286 21.36 85.5
-17.9447 -15.4478 -16.6963 135 0.46 6453 23257 21.82 78.64
-15.4478 -12.9508 -14.1993 163 0.55 6616 23122 22.37 78.18
-12.9508 -10.4538 -11.7023 11444 38.69 18060 22959 61.07 77.63
-10.4538 -7.9569 -9.2053 1051 3.55 19111 11515 64.62 38.93
-7.9569 -5.4599 -6.7084 611 2.07 19722 10464 66.68 35.38
-5.4599 -2.9629 -4.2114 9444 31.93 29166 9853 98.62 33.32
-2.9629 -0.4659 -1.7144 274 0.93 29440 409 99.54 1.38
-0.4659 2.031 0.7825 52 0.18 29492 135 99.72 0.46
2.031 4.5281 3.2796 83 0.28 29575 83 100 0.28
TOTAL - - 29575 100 - - - -
Fuente: Datos ANP 2018

3 Análisis Gráfico

3.1 Histogramas de Frecuencia

col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
h_base <- hist(Variable, breaks = "Sturges", plot = FALSE)

# GRÁFICO 1: Histograma Absoluto (Local)
par(mar = c(8, 5, 4, 2)) 
plot(h_base, 
     main = "Gráfica No.1: Distribución de Latitud Base",
     xlab = "Latitud Base",
     ylab = "Frecuencia Absoluta",
     col = col_gris_azulado, border = "white", axes = FALSE,
     ylim = c(0, max(h_base$counts) * 1.1)) 

axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") 

# GRÁFICO 2: Histograma Global
par(mar = c(8, 5, 4, 2))
plot(h_base, 
     main = "Gráfica N°2: Distribución de Latitud Base",
     xlab = "Latitud Base",
     ylab = "Total Pozos",
     col = col_gris_azulado, border = "white", axes = FALSE, 
     ylim = c(0, sum(h_base$counts))) 

axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")

3.2 Gráficos Porcentuales

h_porc <- h_base
h_porc$counts <- (h_porc$counts / sum(h_porc$counts)) * 100
h_porc$density <- h_porc$counts 

# GRÁFICO 3: Porcentajes (Local)
par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica N°3: Distribución Porcentual de Latitud Base",
     xlab = "Latitud Base",
     ylab = "Porcentaje (%)",
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, max(h_porc$counts)*1.2))

axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids, y = h_porc$counts, label = paste0(round(h_porc$counts, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") 

# GRÁFICO 4: Global Porcentual
par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica No.4: Distribución Porcentual de Latitud Base",
     xlab = "Latitud Base",
     ylab = "% del Total", 
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, 100))

axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
text(x = h_base$mids, y = h_porc$counts, label = paste0(round(h_porc$counts, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
axis(2)
abline(h=seq(0,100,20), col="#D7DBDD", lty="dotted")

3.3 Diagrama de Caja y Ojivas

# GRÁFICO 5: Boxplot
par(mar = c(5, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado, 
        main = "Gráfica No.5: Diagrama de Caja de Latitud Base (Boxplot)",
        xlab = "Profundidad Vertical (m)", outline = TRUE, outpch = 19, outcol = "#C0392B", 
        boxwex = 0.5, frame.plot = FALSE, xaxt = "n") 
eje_x_detallado <- pretty(Variable, n = 20) 
axis(1, at = eje_x_detallado, labels = format(eje_x_detallado, scientific = FALSE), cex.axis=0.7, las=2)
grid(nx=NULL, ny=NA, col="lightgray", lty="dotted")

par(mar = c(5, 5, 4, 8), xpd = TRUE) 

x_asc <- c(min(breaks_table), breaks_table[2:length(breaks_table)])
y_asc <- c(0, Ni_asc)


x_desc <- c(min(breaks_table), breaks_table[2:length(breaks_table)])
y_desc <- c(Ni_desc, 0) 

x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
col_azul <- "#2E4053"
col_rojo <- "#C0392B"

plot(x_asc, y_asc, type = "o", col = col_azul, lwd=2, pch=19,
     main = "Gráfica No.6: Ojivas Ascendente y Descendente de Latitud Base",
     xlab = "Latitud Base", ylab = "Frecuencia acumulada",
     xlim = x_range, ylim = y_range, axes = FALSE, frame.plot = FALSE)

axis(1, at = round(breaks_table,0), labels = format(round(breaks_table,0), scientific = FALSE), las=2, cex.axis=0.6)
axis(2, at = pretty(y_asc), labels = format(pretty(y_asc), scientific = FALSE))

lines(x_asc, y_desc, type = "o", col = col_rojo, lwd=2, pch=19)

legend("right", legend = c("Ascendente", "Descendente"),
       col = c(col_azul, col_rojo), lty = 1, pch = 19, cex = 0.7, lwd=2,
       inset = c(-0.15, 0), bty="n")
grid()

4 Resumen Estadístico

# CÁLCULO DE INDICADORES 

media_val <- mean(Variable)
mediana_val <- median(Variable)

freq_max <- max(TDF_Latitud$ni)
modas_calc <- TDF_Latitud$MC[TDF_Latitud$ni == freq_max]
moda_txt <- paste(round(modas_calc, 2), collapse = ", ")

rango_txt <- paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]")
varianza_val <- var(Variable)
sd_val <- sd(Variable)
cv_val <- (sd_val / abs(media_val)) * 100

asimetria_val <- skewness(Variable, type = 2)
curtosis_val <- kurtosis(Variable, type = 2)

vals_atipicos <- boxplot.stats(Variable)$out
num_atipicos <- length(vals_atipicos)

status_atipicos <- if(num_atipicos > 0) {
  min_out <- min(vals_atipicos)
  max_out <- max(vals_atipicos)
  paste0(num_atipicos, " [", round(min_out, 2), "; ", round(max_out, 2), "]")
} else {
  "0 (Sin atípicos)"
}

df_resumen <- data.frame(
  "Variable" = "Profundidad Vertical (m)",
  "Rango" = rango_txt,
  "Media" = media_val,
  "Mediana" = mediana_val,
  "Moda" = moda_txt,
  "Varianza" = varianza_val,
  "Desv_Std" = sd_val,
  "CV_Porc" = cv_val,
  "Asimetria" = asimetria_val,
  "Curtosis" = curtosis_val,
  "Atipicos" = status_atipicos
)

df_resumen %>%
  gt() %>%
  tab_header(
    title = md("**CONCLUSIONES Y ESTADÍSTICOS**"),
    subtitle = "Resumen de Indicadores de Profundidad Vertical de los Pozos Petrolíferos en Brasil"
  ) %>%
  tab_source_note(source_note = "Autor: Grupo 3") %>%
  fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
  fmt_number(columns = c(Asimetria), decimals = 4) %>%
  cols_label(
    Variable = "Variable",
    Rango = "Rango Total",
    Media = "Media (X̄)",
    Mediana = "Mediana (Me)",
    Moda = "Moda (Mo)",
    Varianza = "Varianza (S²)",
    Desv_Std = "Desv. Est. (S)",
    CV_Porc = "C.V. (%)",
    Asimetria = "Asimetría (As)",
    Curtosis = "Curtosis (K)",
    Atipicos = "Outliers [Intervalo]"
  ) %>%
  tab_options(
    column_labels.background.color = "#2E4053",
    table.border.top.color = "black",
    table.border.bottom.color = "#2E4053",
    column_labels.border.bottom.color = "#2E4053",
    data_row.padding = px(8)
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold", color = "white")),
    locations = cells_column_labels()
  )
CONCLUSIONES Y ESTADÍSTICOS
Resumen de Indicadores de Profundidad Vertical de los Pozos Petrolíferos en Brasil
Variable Rango Total Media (X̄) Mediana (Me) Moda (Mo) Varianza (S²) Desv. Est. (S) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Profundidad Vertical (m) [-32.93; 4.53] −11.41 −10.71 -11.7 39.50 6.28 55.06 −0.6504 −0.51 810 [-32.93; -23.71]
Autor: Grupo 3

5 Conclusión

La variable Latitud Base fluctúa entre –32.93 y 4.53, y sus valores se encuentran en torno a –11 (media = –11.41; mediana = –10.71), con una desviación estándar de 6.28, siendo un conjunto de datos altamente heterogéneo (CV = 55.06 %), cuyos valores se concentran ligeramente hacia latitudes más bajas (asimetría = –0.6504), presenta valores atípicos desde –32.93 hasta –23.71, por lo que la variable se comporta de forma dispersa y asimétrica negativa.