knitr::opts_chunk$set(echo = TRUE)
setwd("C:/Users/LEO/Documents/ESTA")
Datos <- read.csv("tabela_de_pocos_janeiro_2018.csv", header = TRUE, sep = ";" , dec = ".", fileEncoding = "Latin1")
str(Datos)

## 'data.frame':    29575 obs. of  1 variable:
##  $ ï..POCO.CADASTRO.OPERADOR.POCO_OPERADOR.ESTADO.BACIA.BLOCO.SIG_CAMPO.CAMPO.TERRA_MAR.POCO_POS_ANP.TIPO.CATEGORIA.RECLASSIFICACAO.SITUACAO.INICIO.TERMINO.CONCLUSAO.TITULARIDADE.LATITUDE_BASE_4C.LONGITUDE_BASE_4C.LATITUDE_BASE_DD.LONGITUDE_BASE_DD.DATUM_HORIZONTAL.TIPO_DE_COORDENADA_DE_BASE.DIRECAO.PROFUNDIDADE_VERTICAL_M.PROFUNDIDADE_SONDADOR_M.PROFUNDIDADE_MEDIDA_M.REFERENCIA_DE_PROFUNDIDADE.MESA_ROTATIVA.COTA_ALTIMETRICA_M.LAMINA_D_AGUA_M.DATUM_VERTICAL.UNIDADE_ESTRATIGRAFICA.GEOLOGIA_GRUPO_FINAL.GEOLOGIA_FORMACAO_FINAL.GEOLOGIA_MEMBRO_FINAL.CDPE.AGP.PC.PAG.PERFIS_CONVENCIONAIS.DURANTE_PERFURACAO.PERFIS_DIGITAIS.PERFIS_PROCESSADOS.PERFIS_ESPECIAIS.AMOSTRA_LATERAL.SISMICA.TABELA_TEMPO_PROFUNDIDADE.DADOS_DIRECIONAIS.TESTE_A_CABO.TESTE_DE_FORMACAO.CANHONEIO.TESTEMUNHO.GEOQUIMICA.SIG_SONDA.NOM_SONDA.DHA_ATUALIZACAO: chr  "7-RO-123HP-RJS,74281026087,Petrobras,7RO123HPRJS,RJ,Campos,,RO   ,RONCADOR,M,S,ExplotatÃ³rio,Desenvolvimento,PR"| __truncated__ "1-BP-7-RJS,74281026107,BP Energy,ANU,RJ,Campos,C-M-473,,,M,S,ExploratÃ³rio,Pioneiro,PORTADOR DE PETRÃ\u0093LEO,"| __truncated__ "7-ARGO-4H-ESS,34281026170,Shell Brasil,7ARGO4HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,ExplotatÃ³rio,Desenvolvimento,"| __truncated__ "7-ARGO-5H-ESS,34281026180,Shell Brasil,7ARGO5HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,ExplotatÃ³rio,Desenvolvimento,"| __truncated__ ...

# 1. LIBRERÍAS Y CARGA DE DATOS
library(readxl)
library(dplyr)
library(gt)
library(e1071)

# Carga de datos
Datos_Brutos <- read_xlsx("C:/Users/LEO/Documents/ESTA/tabela_de_pocos_janeiro_2018.xlsx", sheet = 1)
colnames(Datos_Brutos) <- trimws(colnames(Datos_Brutos))

# Cambio de variable a COTA_ALTIMETRICA_M
Datos <- Datos_Brutos %>%
  select(any_of(c("POCO", "COTA_ALTIMETRICA_M"))) %>%
  mutate(Variable_Analisis = as.numeric(gsub(",", ".", as.character(COTA_ALTIMETRICA_M))))

Variable <- na.omit(Datos$Variable_Analisis)
# Filtro para elevaciones razonables (0 a 5000 metros)
Variable <- Variable[Variable >= 0 & Variable < 5000]

if(length(Variable) == 0) {
  stop("ERROR: No hay datos válidos para la variable seleccionada.")
}

# 2. CÁLCULOS MATEMÁTICOS PARA LA TABLA
N <- length(Variable)
K <- floor(1 + 3.322 * log10(N)) 
breaks_table <- seq(min(Variable), max(Variable), length.out = K + 1)

# Cálculo de ni usando cut
ni <- as.vector(table(cut(Variable, breaks = breaks_table, include.lowest = TRUE, right = FALSE)))

# Cálculo de vectores estadísticos
hi <- (ni / sum(ni)) * 100 
Ni_asc  <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc  <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))

# Creación de la Tabla de Distribución de Frecuencias (TDF)
TDF_Cota <- data.frame(
  Li = round(breaks_table[1:K], 2), 
  Ls = round(breaks_table[2:(K+1)], 2), 
  MC = round((breaks_table[1:K] + breaks_table[2:(K+1)]) / 2, 2),            
  ni = ni, 
  hi = round(hi, 2),
  Ni_asc = Ni_asc, 
  Ni_desc = Ni_desc, 
  Hi_asc = round(Hi_asc, 2), 
  Hi_desc = round(Hi_desc, 2)
)

# Creación de la Tabla de Distribución de Frecuencias (TDF)
TDF_Cota <- data.frame(
  Li = round(breaks_table[1:K], 2), 
  Ls = round(breaks_table[2:(K+1)], 2), 
  MC = round((breaks_table[1:K] + breaks_table[2:(K+1)]) / 2, 2),            
  ni = ni, 
  hi = round(hi, 2),
  Ni_asc = Ni_asc, 
  Ni_desc = Ni_desc, 
  Hi_asc = round(cumsum(hi), 2), 
  Hi_desc = round(rev(cumsum(rev(hi))), 2)
)

1 Distribución de Frecuencias

Tabla de distribución de frecuencias para la Cota Altimétrica.

TDF_Cota %>%
  gt() %>%
  tab_header(
    title = md("**DISTRIBUCIÓN DE FRECUENCIAS: COTA ALTIMÉTRICA**"),
    subtitle = md("Variable: **COTA_ALTIMETRICA_M**")
  ) %>%
  tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
  grand_summary_rows(
    columns = c(ni, hi),
    fns = list(TOTAL = ~sum(.)),
    formatter = fmt_number, decimals = 0
  ) %>%
  cols_label(
    Li = "Lím. Inf", Ls = "Lím. Sup", MC = "Marca Clase (Xi)", 
    ni = "ni", hi = "hi (%)", 
    Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_style(
    style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
    locations = list(cells_title(), cells_column_labels())
  ) %>%
  tab_options(
    table.border.top.color = "#2E4053",
    table.border.bottom.color = "#2E4053",
    column_labels.border.bottom.color = "#2E4053",
    data_row.padding = px(6)
  )

## Warning: Since gt v0.9.0, the `formatter` argument (and associated `...`) has been
## deprecated.
## • Please use the `fmt` argument to provide formatting directives.
## This warning is displayed once every 8 hours.

	Lím. Inf	Lím. Sup	Marca Clase (Xi)	ni	hi (%)	Ni (Asc)	Ni (Desc)	Hi (Asc)	Hi (Desc)
DISTRIBUCIÓN DE FRECUENCIAS: COTA ALTIMÉTRICA
Variable: COTA_ALTIMETRICA_M
	0.00	314.07	157.04	10862	99.49	10862	10918	99.49	100.00
	314.07	628.14	471.11	16	0.15	10878	56	99.63	0.51
	628.14	942.21	785.18	29	0.27	10907	40	99.90	0.37
	942.21	1256.29	1099.25	8	0.07	10915	11	99.97	0.10
	1256.29	1570.36	1413.32	1	0.01	10916	3	99.98	0.03
	1570.36	1884.43	1727.39	0	0.00	10916	2	99.98	0.02
	1884.43	2198.50	2041.46	0	0.00	10916	2	99.98	0.02
	2198.50	2512.57	2355.54	0	0.00	10916	2	99.98	0.02
	2512.57	2826.64	2669.61	0	0.00	10916	2	99.98	0.02
	2826.64	3140.71	2983.68	0	0.00	10916	2	99.98	0.02
	3140.71	3454.79	3297.75	0	0.00	10916	2	99.98	0.02
	3454.79	3768.86	3611.82	0	0.00	10916	2	99.98	0.02
	3768.86	4082.93	3925.89	0	0.00	10916	2	99.98	0.02
	4082.93	4397.00	4239.96	2	0.02	10918	2	100.00	0.02
TOTAL	—	—	—	10,918	100	—	—	—	—
Fuente: Datos ANP 2018

2 Análisis Gráfico

col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
h_base <- hist(Variable, breaks = breaks_table, plot = FALSE)

2.1 GRÁFICO 1: Histograma Absoluto

breaks_50 <- seq(0, max(Variable) + 50, by = 50)
h_base <- hist(Variable, breaks = breaks_50, plot = FALSE)

par(mar = c(8, 5, 4, 2)) 
plot(h_base, 
     main = "Gráfica No.1: Distribución de Cota Altimétrica de Pozos Petroleros de Brasil",
     xlab = "Cota Altimétrica (m)", ylab = "Frecuencia Absoluta",
     col = col_gris_azulado, border = "white", axes = FALSE,
     ylim = c(0, max(h_base$counts) * 1.1),
     xlim = c(0, 500)) 

axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")

2.2 GRÁFICO 2: Histograma Global

par(mar = c(8, 5, 4, 2))
plot(h_base, 
     main = "Gráfica N°2: Distribución de Cota Altimétrica de Pozos Petroleros de Brasil",
     xlab = "Cota Altimétrica (m)", ylab = "Total Pozos",
     col = col_gris_azulado, border = "white", axes = FALSE, 
     ylim = c(0, sum(h_base$counts)),
     xlim = c(0, 500)) 
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")

2.3 GRÁFICO 3: Porcentajes (Local)

h_porc <- h_base
h_porc$counts <- (h_porc$counts / sum(h_porc$counts)) * 100
par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica N°3: Distribución Porcentual de Cota Altimétrica de Pozos Petroleros de Brasil",
     xlab = "Cota Altimétrica (m)", ylab = "Porcentaje (%)",
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, max(h_porc$counts)*1.2),
     xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)

text(x = h_base$mids[h_base$mids <= 500], 
     y = h_porc$counts[h_base$mids <= 500], 
     label = paste0(round(h_porc$counts[h_base$mids <= 500], 1), "%"), 
     pos = 3, cex = 0.6, col = col_ejes)

2.4 GRÁFICO 4: Global Porcentual

par(mar = c(8, 5, 4, 2))
plot(h_porc,
     main = "Gráfica No.4: Distribución Porcentual de Cota Altimétrica de Pozos Petroleros de Brasil",
     xlab = "Cota Altimétrica (m)", ylab = "% del Total", 
     col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
     ylim = c(0, 100),
     xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids[h_base$mids <= 500], 
     y = h_porc$counts[h_base$mids <= 500], 
     label = paste0(round(h_porc$counts[h_base$mids <= 500], 1), "%"), 
     pos = 3, cex = 0.6, col = col_ejes)

3 Diagrama de Caja y Ojivas

3.1 GRÁFICO 5: Boxplot

par(mar = c(8, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado, 
        main = "Gráfica No.5: Diagrama de Caja (Cota Altimétrica)",
        xlab = "Cota Altimétrica (m)", outline = TRUE, outpch = 19, 
        outcol = "#C0392B", axes = FALSE, xlim = c(0.7, 1.3),
        ylim = c(0, 500)) 

axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
box()

3.2 GRÁFICO 6: Ojivas

par(mar = c(5, 5, 4, 8), xpd = TRUE) 
x_vals <- breaks_table
plot(x_vals, c(0, Ni_asc), type = "o", col = "#2E4053", lwd=2, pch=19, axes=F,
     main = "Gráfica No.6: Ojivas Ascendente y Descendente",
     xlab = "Cota Altimétrica (m)", ylab = "Frecuencia acumulada")
lines(x_vals, c(Ni_desc, 0), type = "o", col = "#C0392B", lwd=2, pch=19)
axis(1, at = round(breaks_table,0), las=2, cex.axis=0.6)
axis(2)
legend("right", legend = c("Asc", "Desc"), col = c("#2E4053", "#C0392B"), lty = 1, pch = 19, inset = c(-0.15, 0), bty="n")
grid()

4 Resumen Estadístico

media_val   <- mean(Variable)
mediana_val <- median(Variable)
sd_val      <- sd(Variable)

status_atipicos <- if(length(boxplot.stats(Variable)$out) > 0) {
  paste0(length(boxplot.stats(Variable)$out), " [", round(min(boxplot.stats(Variable)$out), 2), "; ", round(max(boxplot.stats(Variable)$out), 2), "]")
} else { "0 (Sin atípicos)" }

df_resumen <- data.frame(
  Variable = "Cota Altimétrica (m)",
  Rango = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  Media = media_val,
  Mediana = mediana_val,
  Moda = paste(round(TDF_Cota$MC[TDF_Cota$ni == max(TDF_Cota$ni)], 2), collapse = ", "),
  Varianza = var(Variable),
  Desv_Std = sd_val,
  CV_Porc = (sd_val / abs(media_val)) * 100,
  Asimetria = skewness(Variable, type = 2),
  Curtosis = kurtosis(Variable, type = 2),
  Atipicos = status_atipicos
)

df_resumen %>%
  gt() %>%
  tab_header(title = md("**CONCLUSIONES Y ESTADÍSTICOS**"), subtitle = "Variable: COTA_ALTIMETRICA_M") %>%
  fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
  fmt_number(columns = Asimetria, decimals = 4) %>%
  tab_options(column_labels.background.color = "#2E4053") %>%
  tab_style(style = list(cell_text(weight = "bold", color = "white")), locations = cells_column_labels())

Variable	Rango	Media	Mediana	Moda	Varianza	Desv_Std	CV_Porc	Asimetria	Curtosis	Atipicos
CONCLUSIONES Y ESTADÍSTICOS
Variable: COTA_ALTIMETRICA_M
Cota Altimétrica (m)	[0; 4397]	36.51	12.29	157.04	8,200.43	90.56	248.03	23.3593	976.74	443 [132.08; 4397]

5 Conclusiones

# Lógica de análisis automático
min_txt <- format(min(Variable), scientific = FALSE)
max_txt <- format(max(Variable), scientific = FALSE)
asimetria_val <- skewness(Variable, type = 2)
centro_valor <- format(round(if(abs(asimetria_val) > 0.5) median(Variable) else mean(Variable), 2), scientific = FALSE)
cv_calc <- (sd(Variable) / abs(mean(Variable))) * 100
tipo_homogeneidad <- if(cv_calc > 30) "heterogénea" else "homogénea"
donde_se_concentra <- if(asimetria_val > 0) "parte media baja" else "parte media alta"
juicio_logistico <- if(median(Variable) < 500) "favorable para la infraestructura" else "desafiante para la logística"

cat(paste0(
  "## Análisis Descriptivo y Logístico\n\n",
  "La variable **Cota Altimétrica** fluctúa entre **", min_txt, "** y **", max_txt, "** metros, ",
  "con un centro de distribución en **", centro_valor, "** metros. ",
  "La muestra se comporta como una variable **", tipo_homogeneidad, "** (CV: ", round(cv_calc, 2), "%), ",
  "concentrándose mayoritariamente en la **", donde_se_concentra, "** de la distribución. ",
  "Desde una perspectiva operativa, el comportamiento se considera **", juicio_logistico, "** para la preparación del terreno."
))

5.1 Análisis Descriptivo y Logístico

La variable Cota Altimétrica fluctúa entre 0 y 4397 metros, con un centro de distribución en 12.3 metros. La muestra se comporta como una variable heterogénea (CV: 248.03%), concentrándose mayoritariamente en la parte media baja de la distribución. Desde una perspectiva operativa, el comportamiento se considera favorable para la infraestructura para la preparación del terreno.

Cota Altimetrica

Leonardo Ruiz

2026-03-08