knitr::opts_chunk$set(echo = TRUE)
setwd("C:/Users/LEO/Documents/ESTA")
Datos <- read.csv("tabela_de_pocos_janeiro_2018.csv", header = TRUE, sep = ";" , dec = ".", fileEncoding = "Latin1")
str(Datos)
## 'data.frame': 29575 obs. of 1 variable:
## $ ï..POCO.CADASTRO.OPERADOR.POCO_OPERADOR.ESTADO.BACIA.BLOCO.SIG_CAMPO.CAMPO.TERRA_MAR.POCO_POS_ANP.TIPO.CATEGORIA.RECLASSIFICACAO.SITUACAO.INICIO.TERMINO.CONCLUSAO.TITULARIDADE.LATITUDE_BASE_4C.LONGITUDE_BASE_4C.LATITUDE_BASE_DD.LONGITUDE_BASE_DD.DATUM_HORIZONTAL.TIPO_DE_COORDENADA_DE_BASE.DIRECAO.PROFUNDIDADE_VERTICAL_M.PROFUNDIDADE_SONDADOR_M.PROFUNDIDADE_MEDIDA_M.REFERENCIA_DE_PROFUNDIDADE.MESA_ROTATIVA.COTA_ALTIMETRICA_M.LAMINA_D_AGUA_M.DATUM_VERTICAL.UNIDADE_ESTRATIGRAFICA.GEOLOGIA_GRUPO_FINAL.GEOLOGIA_FORMACAO_FINAL.GEOLOGIA_MEMBRO_FINAL.CDPE.AGP.PC.PAG.PERFIS_CONVENCIONAIS.DURANTE_PERFURACAO.PERFIS_DIGITAIS.PERFIS_PROCESSADOS.PERFIS_ESPECIAIS.AMOSTRA_LATERAL.SISMICA.TABELA_TEMPO_PROFUNDIDADE.DADOS_DIRECIONAIS.TESTE_A_CABO.TESTE_DE_FORMACAO.CANHONEIO.TESTEMUNHO.GEOQUIMICA.SIG_SONDA.NOM_SONDA.DHA_ATUALIZACAO: chr "7-RO-123HP-RJS,74281026087,Petrobras,7RO123HPRJS,RJ,Campos,,RO ,RONCADOR,M,S,Explotatório,Desenvolvimento,PR"| __truncated__ "1-BP-7-RJS,74281026107,BP Energy,ANU,RJ,Campos,C-M-473,,,M,S,Exploratório,Pioneiro,PORTADOR DE PETRÃ\u0093LEO,"| __truncated__ "7-ARGO-4H-ESS,34281026170,Shell Brasil,7ARGO4HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,Explotatório,Desenvolvimento,"| __truncated__ "7-ARGO-5H-ESS,34281026180,Shell Brasil,7ARGO5HESS,ES,Campos,,ARGO ,ARGONAUTA,M,S,Explotatório,Desenvolvimento,"| __truncated__ ...
library(readxl)
library(dplyr)
library(gt)
library(e1071)
# Carga de datos
Datos_Brutos <- read_xlsx("C:/Users/LEO/Documents/ESTA/tabela_de_pocos_janeiro_2018.xlsx", sheet = 1)
colnames(Datos_Brutos) <- trimws(colnames(Datos_Brutos))
# Cambio de variable a MESA_ROTATIVA
Datos <- Datos_Brutos %>%
select(any_of(c("POCO", "MESA_ROTATIVA"))) %>%
mutate(Variable_Analisis = as.numeric(gsub(",", ".", as.character(MESA_ROTATIVA))))
Variable <- na.omit(Datos$Variable_Analisis)
# Filtro para valores razonables (0 a 1000 metros)
Variable <- Variable[Variable >= 0 & Variable < 1000]
if(length(Variable) == 0) {
stop("ERROR: No hay datos válidos para la variable seleccionada.")
}
# 2. CÁLCULOS MATEMÁTICOS PARA LA TABLA
N <- length(Variable)
K <- floor(1 + 3.322 * log10(N))
breaks_table <- seq(min(Variable), max(Variable), length.out = K + 1)
# Cálculo de ni usando cut
ni <- as.vector(table(cut(Variable, breaks = breaks_table, include.lowest = TRUE, right = FALSE)))
# Cálculo de vectores estadísticos
hi <- (ni / sum(ni)) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# Creación de la Tabla de Distribución de Frecuencias (TDF)
TDF_Mesa <- data.frame(
Li = round(breaks_table[1:K], 2),
Ls = round(breaks_table[2:(K+1)], 2),
MC = round((breaks_table[1:K] + breaks_table[2:(K+1)]) / 2, 2),
ni = ni,
hi = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = round(Hi_asc, 2),
Hi_desc = round(Hi_desc, 2)
)
Tabla de distribución de frecuencias para la Cota Altimétrica.
TDF_Mesa %>%
gt() %>%
tab_header(
title = md("**DISTRIBUCIÓN DE FRECUENCIAS: MESA ROTATIVA**"),
subtitle = md("Variable: **MESA_ROTATIVA**")
) %>%
tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
grand_summary_rows(
columns = c(ni, hi),
fns = list(TOTAL = ~sum(.)),
formatter = fmt_number, decimals = 0
) %>%
cols_label(
Li = "Lím. Inf", Ls = "Lím. Sup", MC = "Marca Clase (Xi)",
ni = "ni", hi = "hi (%)",
Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_style(
style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
locations = list(cells_title(), cells_column_labels())
) %>%
tab_options(
table.border.top.color = "#2E4053",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
)
## Warning: Since gt v0.9.0, the `formatter` argument (and associated `...`) has been
## deprecated.
## • Please use the `fmt` argument to provide formatting directives.
## This warning is displayed once every 8 hours.
| DISTRIBUCIÓN DE FRECUENCIAS: MESA ROTATIVA | |||||||||
| Variable: MESA_ROTATIVA | |||||||||
| Lím. Inf | Lím. Sup | Marca Clase (Xi) | ni | hi (%) | Ni (Asc) | Ni (Desc) | Hi (Asc) | Hi (Desc) | |
|---|---|---|---|---|---|---|---|---|---|
| 0.00 | 66.27 | 33.13 | 20359 | 70.60 | 20359 | 28838 | 70.60 | 100.00 | |
| 66.27 | 132.53 | 99.40 | 6242 | 21.65 | 26601 | 8479 | 92.24 | 29.40 | |
| 132.53 | 198.80 | 165.67 | 1531 | 5.31 | 28132 | 2237 | 97.55 | 7.76 | |
| 198.80 | 265.07 | 231.93 | 403 | 1.40 | 28535 | 706 | 98.95 | 2.45 | |
| 265.07 | 331.33 | 298.20 | 65 | 0.23 | 28600 | 303 | 99.17 | 1.05 | |
| 331.33 | 397.60 | 364.47 | 69 | 0.24 | 28669 | 238 | 99.41 | 0.83 | |
| 397.60 | 463.87 | 430.73 | 22 | 0.08 | 28691 | 169 | 99.49 | 0.59 | |
| 463.87 | 530.13 | 497.00 | 19 | 0.07 | 28710 | 147 | 99.56 | 0.51 | |
| 530.13 | 596.40 | 563.27 | 25 | 0.09 | 28735 | 128 | 99.64 | 0.44 | |
| 596.40 | 662.67 | 629.53 | 21 | 0.07 | 28756 | 103 | 99.72 | 0.36 | |
| 662.67 | 728.93 | 695.80 | 19 | 0.07 | 28775 | 82 | 99.78 | 0.28 | |
| 728.93 | 795.20 | 762.07 | 15 | 0.05 | 28790 | 63 | 99.83 | 0.22 | |
| 795.20 | 861.47 | 828.33 | 27 | 0.09 | 28817 | 48 | 99.93 | 0.17 | |
| 861.47 | 927.73 | 894.60 | 14 | 0.05 | 28831 | 21 | 99.98 | 0.07 | |
| 927.73 | 994.00 | 960.87 | 7 | 0.02 | 28838 | 7 | 100.00 | 0.02 | |
| TOTAL | — | — | — | 28,838 | 100 | — | — | — | — |
| Fuente: Datos ANP 2018 | |||||||||
col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
breaks_50 <- seq(0, max(Variable) + 50, by = 50)
h_base <- hist(Variable, breaks = breaks_50, plot = FALSE)
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica No.1: Distribución de Mesa Rotativa de Pozos Petroleros de Brasil",
xlab = "Mesa Rotativa (m)", ylab = "Frecuencia Absoluta",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, max(h_base$counts) * 1.1),
xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica N°2: Distribución de Mesa Rotativa de Pozos Petroleros de Brasil",
xlab = "Mesa Rotativa (m)", ylab = "Total Pozos",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, sum(h_base$counts)),
xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
grid(nx = NA, ny = NULL, col = "#D7DBDD", lty = "dotted")
h_porc <- h_base
h_porc$counts <- (h_porc$counts / sum(h_porc$counts)) * 100
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica N°3: Distribución Porcentual de Mesa Rotativa de Pozos Petroleros de Brasil",
xlab = "Mesa Rotativa (m)", ylab = "Porcentaje (%)",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, max(h_porc$counts)*1.2),
xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids[h_base$mids <= 500],
y = h_porc$counts[h_base$mids <= 500],
label = paste0(round(h_porc$counts[h_base$mids <= 500], 1), "%"),
pos = 3, cex = 0.6, col = col_ejes)
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica No.4: Distribución Porcentual de Mesa Rotativa de Pozos Petroleros de Brasil",
xlab = "Mesa Rotativa (m)", ylab = "% del Total",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, 100),
xlim = c(0, 500))
axis(1, at = seq(0, 500, by = 50), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids[h_base$mids <= 500],
y = h_porc$counts[h_base$mids <= 500],
label = paste0(round(h_porc$counts[h_base$mids <= 500], 1), "%"),
pos = 3, cex = 0.6, col = col_ejes)
col_gris_azulado <- "#5D6D7E"
col_acento <- "#C0392B"
par(mar = c(8, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado,
main = "Gráfica No.5: Diagrama de Caja (Mesa Rotativa)",
xlab = "Mesa Rotativa (m)", outline = TRUE, outpch = 19,
outcol = col_acento, axes = FALSE, xlim = c(0.7, 1.3),
ylim = c(0, 200))
axis(1, at = seq(0, 200, by = 25), las = 2, cex.axis = 0.7)
box()
col_azul_oscuro <- "#2E4053"
col_rojo_fuerte <- "#C0392B"
par(mar = c(8, 5, 4, 8), xpd = TRUE)
x_vals_ojiva <- breaks_table
plot(x_vals_ojiva, c(0, Ni_asc), type = "o", col = col_azul_oscuro,
lwd=2, pch=19, axes=F,
main = "Gráfica No.6: Ojivas Ascendente y Descendente (Mesa Rotativa)",
xlab = "Mesa Rotativa (m)", ylab = "Frecuencia acumulada")
lines(x_vals_ojiva, c(Ni_desc, 0), type = "o", col = col_rojo_fuerte,
lwd=2, pch=19)
axis(1, at = seq(0, max(breaks_table), by = 50), las = 2, cex.axis = 0.6)
axis(2)
legend("right", legend = c("Ascendente", "Descendente"),
col = c(col_azul_oscuro, col_rojo_fuerte),
lty = 1, pch = 19, cex = 0.7, lwd=2,
inset = c(-0.15, 0), bty="n")
grid()
media_val <- mean(Variable)
mediana_val <- median(Variable)
sd_val <- sd(Variable)
status_atipicos <- if(length(boxplot.stats(Variable)$out) > 0) {
paste0(length(boxplot.stats(Variable)$out), " [", round(min(boxplot.stats(Variable)$out), 2), "; ", round(max(boxplot.stats(Variable)$out), 2), "]")
} else { "0 (Sin atípicos)" }
df_resumen <- data.frame(
Variable = "Mesa Rotativa (m)",
Rango = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
Media = media_val,
Mediana = mediana_val,
Moda = paste(round(TDF_Mesa$MC[TDF_Mesa$ni == max(TDF_Mesa$ni)], 2), collapse = ", "),
Varianza = var(Variable),
Desv_Std = sd_val,
CV_Porc = (sd_val / abs(media_val)) * 100,
Asimetria = skewness(Variable, type = 2),
Curtosis = kurtosis(Variable, type = 2),
Atipicos = status_atipicos
)
df_resumen %>%
gt() %>%
tab_header(title = md("**CONCLUSIONES Y ESTADÍSTICOS**"), subtitle = "Variable: MESA_ROTATIVA") %>%
fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
fmt_number(columns = Asimetria, decimals = 4) %>%
tab_options(column_labels.background.color = "#2E4053") %>%
tab_style(style = list(cell_text(weight = "bold", color = "white")), locations = cells_column_labels())
| CONCLUSIONES Y ESTADÍSTICOS | ||||||||||
| Variable: MESA_ROTATIVA | ||||||||||
| Variable | Rango | Media | Mediana | Moda | Varianza | Desv_Std | CV_Porc | Asimetria | Curtosis | Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Mesa Rotativa (m) | [0; 994] | 56.16 | 32.00 | 33.13 | 4,829.68 | 69.50 | 123.76 | 5.4490 | 49.24 | 1269 [164.79; 994] |
min_txt <- format(min(Variable), scientific = FALSE)
max_txt <- format(max(Variable), scientific = FALSE)
asimetria_val <- skewness(Variable, type = 2)
centro_valor <- format(round(if(abs(asimetria_val) > 0.5) median(Variable) else mean(Variable), 2), scientific = FALSE)
cv_calc <- (sd(Variable) / abs(mean(Variable))) * 100
tipo_homogeneidad <- if(cv_calc > 30) "heterogénea" else "homogénea"
donde_se_concentra <- if(asimetria_val > 0) "valores bajos" else "valores altos"
juicio_operativo <- if(median(Variable) < 300) "dentro del estándar operacional" else "requiere equipos de mayor capacidad"
cat(paste0(
"## Análisis de Mesa Rotativa\n\n",
"La variable **Mesa Rotativa** oscila entre **", min_txt, "** y **", max_txt, "** metros. ",
"El centro de la distribución se localiza en **", centro_valor, "** metros. ",
"La muestra se define como una variable **", tipo_homogeneidad, "** (CV: ", round(cv_calc, 2), "%), ",
"presentando una mayor densidad en los **", donde_se_concentra, "** de la escala. ",
"Desde el punto de vista de ingeniería, estos valores se consideran **", juicio_operativo, "** para las operaciones de perforación."
))
La variable Mesa Rotativa oscila entre 0 y 994 metros. El centro de la distribución se localiza en 32 metros. La muestra se define como una variable heterogénea (CV: 123.76%), presentando una mayor densidad en los valores bajos de la escala. Desde el punto de vista de ingeniería, estos valores se consideran dentro del estándar operacional para las operaciones de perforación.