knitr::opts_chunk$set(echo = TRUE)
setwd("C:/Users/LEO/Documents/ESTA")
Datos <- read.csv("tabela_de_pocos_janeiro_2018.csv", header = TRUE, sep = ";" , dec = ".", fileEncoding = "Latin1")
str(Datos)
## 'data.frame': 29575 obs. of 1 variable:
## $ ï..POCO.CADASTRO.OPERADOR.POCO_OPERADOR.ESTADO.BACIA.BLOCO.SIG_CAMPO.CAMPO.TERRA_MAR.POCO_POS_ANP.TIPO.CATEGORIA.RECLASSIFICACAO.SITUACAO.INICIO.TERMINO.CONCLUSAO.TITULARIDADE.LATITUDE_BASE_4C.LONGITUDE_BASE_4C.LATITUDE_BASE_DD.LONGITUDE_BASE_DD.DATUM_HORIZONTAL.TIPO_DE_COORDENADA_DE_BASE.DIRECAO.PROFUNDIDADE_VERTICAL_M.PROFUNDIDADE_SONDADOR_M.PROFUNDIDADE_MEDIDA_M.REFERENCIA_DE_PROFUNDIDADE.MESA_ROTATIVA.COTA_ALTIMETRICA_M.LAMINA_D_AGUA_M.DATUM_VERTICAL.UNIDADE_ESTRATIGRAFICA.GEOLOGIA_GRUPO_FINAL.GEOLOGIA_FORMACAO_FINAL.GEOLOGIA_MEMBRO_FINAL.CDPE.AGP.PC.PAG.PERFIS_CONVENCIONAIS.DURANTE_PERFURACAO.PERFIS_DIGITAIS.PERFIS_PROCESSADOS.PERFIS_ESPECIAIS.AMOSTRA_LATERAL.SISMICA.TABELA_TEMPO_PROFUNDIDADE.DADOS_DIRECIONAIS.TESTE_A_CABO.TESTE_DE_FORMACAO.CANHONEIO.TESTEMUNHO.GEOQUIMICA.SIG_SONDA.NOM_SONDA.DHA_ATUALIZACAO: chr "7-RO-123HP-RJS,74281026087,Petrobras,7RO123HPRJS,RJ,Campos,,RO ,RONCADOR,M,S,Explotatório,Desenvolvimento,PR"| __truncated__ "1-BP-7-RJS,74281026107,BP Energy,ANU,RJ,Campos,C-M-473,,,M,S,Exploratório,Pioneiro,PORTADOR DE PETRÃ\u0093LEO,"| __truncated__ "7-ARGO-4H-ESS,34281026170,Shell Brasil,7ARGO4HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,Explotatório,Desenvolvimento,"| __truncated__ "7-ARGO-5H-ESS,34281026180,Shell Brasil,7ARGO5HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,Explotatório,Desenvolvimento,"| __truncated__ ...
library(readxl)
library(dplyr)
library(gt)
library(e1071)
# Carga de datos
Datos_Brutos <- read_xlsx("C:/Users/LEO/Documents/ESTA/tabela_de_pocos_janeiro_2018.xlsx", sheet = 1)
colnames(Datos_Brutos) <- trimws(colnames(Datos_Brutos))
# Cambio de variable a LAMINA_D_AGUA_M
Datos <- Datos_Brutos %>%
select(any_of(c("POCO", "LAMINA_D_AGUA_M"))) %>%
mutate(Variable_Analisis = as.numeric(gsub(",", ".", as.character(LAMINA_D_AGUA_M))))
Variable <- na.omit(Datos$Variable_Analisis)
# Filtro para profundidades razonables (0 a 5000 metros)
Variable <- Variable[Variable >= 0 & Variable < 5000]
if(length(Variable) == 0) {
stop("ERROR: No hay datos válidos para la variable seleccionada.")
}
# 2. CÁLCULOS MATEMÁTICOS PARA LA TABLA
N <- length(Variable)
K <- floor(1 + 3.322 * log10(N))
breaks_table <- seq(min(Variable), max(Variable), length.out = K + 1)
# Cálculo de ni usando cut
ni <- as.vector(table(cut(Variable, breaks = breaks_table, include.lowest = TRUE, right = FALSE)))
# Cálculo de vectores estadísticos
hi <- (ni / sum(ni)) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# Creación de la Tabla de Distribución de Frecuencias (TDF)
TDF_Lamina <- data.frame(
Li = round(breaks_table[1:K], 2),
Ls = round(breaks_table[2:(K+1)], 2),
MC = round((breaks_table[1:K] + breaks_table[2:(K+1)]) / 2, 2),
ni = ni,
hi = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = round(Hi_asc, 2),
Hi_desc = round(Hi_desc, 2)
)
Tabla de distribución de frecuencias para la Cota Altimétrica.
TDF_Lamina %>%
gt() %>%
tab_header(
title = md("**DISTRIBUCIÓN DE FRECUENCIAS: LÁMINA DE AGUA**"),
subtitle = md("Variable: **LAMINA_D_AGUA_M**")
) %>%
tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
grand_summary_rows(
columns = c(ni, hi),
fns = list(TOTAL = ~sum(.)),
formatter = fmt_number, decimals = 0
) %>%
cols_label(
Li = "Lím. Inf", Ls = "Lím. Sup", MC = "Marca Clase (Xi)",
ni = "ni", hi = "hi (%)",
Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_style(
style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
locations = list(cells_title(), cells_column_labels())
) %>%
tab_options(
table.border.top.color = "#2E4053",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
)
## Warning: Since gt v0.9.0, the `formatter` argument (and associated `...`) has been
## deprecated.
## • Please use the `fmt` argument to provide formatting directives.
## This warning is displayed once every 8 hours.
| DISTRIBUCIÓN DE FRECUENCIAS: LÁMINA DE AGUA | |||||||||
| Variable: LAMINA_D_AGUA_M | |||||||||
| Lím. Inf | Lím. Sup | Marca Clase (Xi) | ni | hi (%) | Ni (Asc) | Ni (Desc) | Hi (Asc) | Hi (Desc) | |
|---|---|---|---|---|---|---|---|---|---|
| 0.00 | 213.43 | 106.71 | 10682 | 79.33 | 10682 | 13466 | 79.33 | 100.00 | |
| 213.43 | 426.86 | 320.14 | 263 | 1.95 | 10945 | 2784 | 81.28 | 20.67 | |
| 426.86 | 640.29 | 533.57 | 233 | 1.73 | 11178 | 2521 | 83.01 | 18.72 | |
| 640.29 | 853.71 | 747.00 | 275 | 2.04 | 11453 | 2288 | 85.05 | 16.99 | |
| 853.71 | 1067.14 | 960.43 | 416 | 3.09 | 11869 | 2013 | 88.14 | 14.95 | |
| 1067.14 | 1280.57 | 1173.86 | 377 | 2.80 | 12246 | 1597 | 90.94 | 11.86 | |
| 1280.57 | 1494.00 | 1387.29 | 380 | 2.82 | 12626 | 1220 | 93.76 | 9.06 | |
| 1494.00 | 1707.43 | 1600.71 | 239 | 1.77 | 12865 | 840 | 95.54 | 6.24 | |
| 1707.43 | 1920.86 | 1814.14 | 241 | 1.79 | 13106 | 601 | 97.33 | 4.46 | |
| 1920.86 | 2134.29 | 2027.57 | 133 | 0.99 | 13239 | 360 | 98.31 | 2.67 | |
| 2134.29 | 2347.71 | 2241.00 | 160 | 1.19 | 13399 | 227 | 99.50 | 1.69 | |
| 2347.71 | 2561.14 | 2454.43 | 28 | 0.21 | 13427 | 67 | 99.71 | 0.50 | |
| 2561.14 | 2774.57 | 2667.86 | 22 | 0.16 | 13449 | 39 | 99.87 | 0.29 | |
| 2774.57 | 2988.00 | 2881.29 | 17 | 0.13 | 13466 | 17 | 100.00 | 0.13 | |
| TOTAL | — | — | — | 13,466 | 100 | — | — | — | — |
| Fuente: Datos ANP 2018 | |||||||||
col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
breaks_agrupados <- seq(0, 1500, by = 100)
h_base <- hist(Variable[Variable <= 1500], breaks = breaks_agrupados, plot = FALSE)
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica No.1: Distribución de Lámina de Agua de Pozos Petroleros de Brasil",
xlab = "Lámina de Agua (m)", ylab = "Frecuencia Absoluta",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, max(h_base$counts) * 1.1))
# Eje X con saltos de 100 para que se vea la agrupación detallada
axis(1, at = seq(0, 1500, by = 100), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica N°2: Distribución de Lámina de Agua de Pozos Petroleros de Brasil",
xlab = "Lámina de Agua (m)", ylab = "Total Pozos",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, sum(h_base$counts)))
axis(1, at = seq(0, 1500, by = 100), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")
h_porc <- h_base
h_porc$counts <- (h_porc$counts / sum(h_porc$counts)) * 100
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica N°3: Distribución Porcentual (Zona de Mayor Concentración)",
xlab = "Lámina de Agua (m)", ylab = "Porcentaje (%)",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, max(h_porc$counts)*1.2))
axis(1, at = seq(0, 1500, by = 100), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids[h_porc$counts > 0.5],
y = h_porc$counts[h_porc$counts > 0.5],
label = paste0(round(h_porc$counts[h_porc$counts > 0.5], 1), "%"),
pos = 3, cex = 0.6, col = col_ejes)
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica No.4: Distribución Porcentual de Lámina de Agua de Pozos Petroleros de Brasil",
xlab = "Lámina de Agua (m)", ylab = "% del Total",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, 100),
xlim = c(0, 3000))
axis(1, at = seq(0, 3000, by = 250), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids[h_base$mids <= 3000],
y = h_porc$counts[h_base$mids <= 3000],
label = paste0(round(h_porc$counts[h_base$mids <= 3000], 1), "%"),
pos = 3, cex = 0.6, col = col_ejes)
par(mar = c(8, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado,
main = "Gráfica No.5: Diagrama de Caja (Lámina de Agua)",
xlab = "Lámina de Agua (m)", outline = TRUE, outpch = 19,
outcol = "#C0392B", axes = FALSE, ylim = c(0, 1500))
axis(1, at = seq(0, 1500, by = 100), las = 2, cex.axis = 0.7)
box()
par(mar = c(5, 5, 4, 8), xpd = TRUE)
x_vals_ojiva <- seq(0, max(Variable) + 250, by = 250)
ni_ojiva <- as.vector(table(cut(Variable, breaks = x_vals_ojiva, include.lowest = TRUE, right = FALSE)))
Ni_asc_ojiva <- cumsum(ni_ojiva)
Ni_desc_ojiva <- rev(cumsum(rev(ni_ojiva)))
plot(x_vals_ojiva, c(0, Ni_asc_ojiva), type = "o", col = "#2E4053", lwd=2, pch=19, axes=F,
main = "Gráfica No.6: Ojivas Ascendente y Descendente",
xlab = "Lámina de Agua (m)", ylab = "Frecuencia acumulada",
xlim = c(0, 3000))
lines(x_vals_ojiva, c(Ni_desc_ojiva, 0), type = "o", col = "#C0392B", lwd=2, pch=19)
axis(1, at = seq(0, 3000, by = 250), las = 2, cex.axis = 0.6)
axis(2)
legend("right", legend = c("Asc", "Desc"), col = c("#2E4053", "#C0392B"), lty = 1, pch = 19, inset = c(-0.15, 0), bty="n")
grid()
media_val <- mean(Variable)
mediana_val <- median(Variable)
sd_val <- sd(Variable)
status_atipicos <- if(length(boxplot.stats(Variable)$out) > 0) {
paste0(length(boxplot.stats(Variable)$out), " [", round(min(boxplot.stats(Variable)$out), 2), "; ", round(max(boxplot.stats(Variable)$out), 2), "]")
} else { "0 (Sin atípicos)" }
df_resumen <- data.frame(
Variable = "Lámina de Agua (m)",
Rango = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
Media = media_val,
Mediana = mediana_val,
Moda = paste(round(TDF_Lamina$MC[TDF_Lamina$ni == max(TDF_Lamina$ni)], 2), collapse = ", "),
Varianza = var(Variable),
Desv_Std = sd_val,
CV_Porc = (sd_val / abs(media_val)) * 100,
Asimetria = skewness(Variable, type = 2),
Curtosis = kurtosis(Variable, type = 2),
Atipicos = status_atipicos
)
df_resumen %>%
gt() %>%
tab_header(title = md("**CONCLUSIONES Y ESTADÍSTICOS**"), subtitle = "Variable: LAMINA_D_AGUA_M") %>%
fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
fmt_number(columns = Asimetria, decimals = 4) %>%
tab_options(column_labels.background.color = "#2E4053") %>%
tab_style(style = list(cell_text(weight = "bold", color = "white")), locations = cells_column_labels())
| CONCLUSIONES Y ESTADÍSTICOS | ||||||||||
| Variable: LAMINA_D_AGUA_M | ||||||||||
| Variable | Rango | Media | Mediana | Moda | Varianza | Desv_Std | CV_Porc | Asimetria | Curtosis | Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Lámina de Agua (m) | [0; 2988] | 268.13 | 0.00 | 106.71 | 307,109.51 | 554.17 | 206.68 | 2.2107 | 3.97 | 2638 [296; 2988] |
min_txt <- format(min(Variable), scientific = FALSE)
max_txt <- format(max(Variable), scientific = FALSE)
asimetria_val <- skewness(Variable, type = 2)
centro_valor <- format(round(if(abs(asimetria_val) > 0.5) median(Variable) else mean(Variable), 2), scientific = FALSE)
cv_calc <- (sd(Variable) / abs(mean(Variable))) * 100
tipo_homogeneidad <- if(cv_calc > 30) "heterogénea" else "homogénea"
donde_se_concentra <- if(asimetria_val > 0) "aguas someras" else "aguas profundas"
juicio_operativo <- if(median(Variable) < 1000) "operación convencional" else "operación de aguas ultra-profundas"
cat(paste0(
"## Análisis de Lámina de Agua\n\n",
"La variable **Lámina de Agua** presenta un rango de **", min_txt, "** a **", max_txt, "** metros. ",
"El valor central se sitúa en **", centro_valor, "** metros. ",
"La distribución se caracteriza por ser **", tipo_homogeneidad, "** (CV: ", round(cv_calc, 2), "%), ",
"con una concentración de datos orientada hacia **", donde_se_concentra, "**. ",
"Este comportamiento sugiere una logística de **", juicio_operativo, "** para la mayoría de los pozos analizados."
))
La variable Lámina de Agua presenta un rango de 0 a 2988 metros. El valor central se sitúa en 0 metros. La distribución se caracteriza por ser heterogénea (CV: 206.68%), con una concentración de datos orientada hacia aguas someras. Este comportamiento sugiere una logística de operación convencional para la mayoría de los pozos analizados.