El presente informe estadístico analiza la variable Profundidad Vertical de pozos petroleros de Brasil.
# ==============================================================================
# 1. LIBRERÍAS Y CARGA DE DATOS
# ==============================================================================
if(!require(readxl)) install.packages("readxl")
if(!require(dplyr)) install.packages("dplyr")
if(!require(gt)) install.packages("gt")
if(!require(e1071)) install.packages("e1071")
library(readxl)
library(dplyr)
library(gt)
library(e1071)
# Carga de datos
Datos_Brutos <- read_excel("tabela_de_pocos_janeiro_2018.xlsx", sheet = 1)
# Selección y Limpieza
vars_to_keep <- c("POCO", "OPERADOR", "ESTADO", "BACIA", "DIRECAO",
"TIPO", "SITUACAO", "PROFUNDIDADE_VERTICAL_M", "LAMINA_D_AGUA_M")
Datos <- Datos_Brutos %>%
select(any_of(vars_to_keep)) %>%
mutate(PROFUNDIDADE_VERTICAL_M = as.numeric(gsub(",", ".", as.character(PROFUNDIDADE_VERTICAL_M))))
Variable <- na.omit(Datos$PROFUNDIDADE_VERTICAL_M)
Variable <- Variable[Variable > 0]
# ==============================================================================
# 2. CÁLCULOS MATEMÁTICOS (STURGES)
# ==============================================================================
N <- length(Variable)
if(N == 0) stop("ERROR CRÍTICO: No hay datos válidos.")
min_val <- min(Variable)
max_val <- max(Variable)
Rango <- max_val - min_val
K <- floor(1 + 3.322 * log10(N))
Amplitud <- Rango / K
breaks_raw <- seq(min_val, max_val, length.out = K + 1)
breaks_raw[length(breaks_raw)] <- max_val + 0.0001
lim_inf_raw <- breaks_raw[1:K]
lim_sup_raw <- breaks_raw[2:(K+1)]
MC <- (lim_inf_raw + lim_sup_raw) / 2
# Cálculo de Frecuencias
ni <- numeric(K)
for (i in 1:K) {
if (i < K) {
ni[i] <- length(Variable[Variable >= lim_inf_raw[i] & Variable < lim_sup_raw[i]])
} else {
ni[i] <- length(Variable[Variable >= lim_inf_raw[i] & Variable <= lim_sup_raw[i]])
}
}
hi <- (ni / sum(ni)) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# Dataframe
TDF_Profundidad <- data.frame(
Li = round(lim_inf_raw, 2),
Ls = round(lim_sup_raw, 2),
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = round(Hi_asc, 2),
Hi_desc = round(Hi_desc, 2)
)A continuación se presenta la tabla de distribución de frecuencias obtenida.
# Totales
totales <- c("TOTAL", "-", "-", sum(ni), round(sum(hi), 2), "-", "-", "-", "-")
TDF_Char <- TDF_Profundidad %>% mutate(across(everything(), as.character))
TDF_Final <- rbind(TDF_Char, totales)
TDF_Final %>%
gt() %>%
tab_header(
title = md("**DISTRIBUCIÓN DE FRECUENCIAS**"),
subtitle = md("Variable: **Profundidad Vertical (m)**")
) %>%
tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
cols_label(
Li = "Lím. Inf", Ls = "Lím. Sup", MC = "Marca Clase (Xi)",
ni = "ni", hi = "hi (%)",
Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_style(
style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
locations = cells_title()
) %>%
tab_style(
style = list(cell_fill(color = "#F2F3F4"), cell_text(weight = "bold", color = "#2E4053")),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "#2E4053",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
)| DISTRIBUCIÓN DE FRECUENCIAS | ||||||||
| Variable: Profundidad Vertical (m) | ||||||||
| Lím. Inf | Lím. Sup | Marca Clase (Xi) | ni | hi (%) | Ni (Asc) | Ni (Desc) | Hi (Asc) | Hi (Desc) |
|---|---|---|---|---|---|---|---|---|
| 4 | 2660.33 | 1332.17 | 1825 | 74.01 | 1825 | 2466 | 74.01 | 100 |
| 2660.33 | 5316.67 | 3988.5 | 566 | 22.95 | 2391 | 641 | 96.96 | 25.99 |
| 5316.67 | 7973 | 6644.83 | 73 | 2.96 | 2464 | 75 | 99.92 | 3.04 |
| 7973 | 10629.33 | 9301.17 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 10629.33 | 13285.67 | 11957.5 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 13285.67 | 15942 | 14613.83 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 15942 | 18598.33 | 17270.17 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 18598.33 | 21254.67 | 19926.5 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 21254.67 | 23911 | 22582.83 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 23911 | 26567.33 | 25239.17 | 0 | 0 | 2464 | 2 | 99.92 | 0.08 |
| 26567.33 | 29223.67 | 27895.5 | 1 | 0.04 | 2465 | 2 | 99.96 | 0.08 |
| 29223.67 | 31880 | 30551.83 | 1 | 0.04 | 2466 | 1 | 100 | 0.04 |
| TOTAL | - | - | 2466 | 100 | - | - | - | - |
| Fuente: Datos ANP 2018 | ||||||||
Esta sección presenta la visualización de la distribución de los datos.
col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
h_base <- hist(Variable, breaks = breaks_raw, right = FALSE, plot = FALSE)
# --- GRÁFICO 1: Histograma Absoluto (Local) ---
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica No.1: Distribución de Profundidad Vertical (Local)",
xlab = "Profundidad Vertical (m)",
ylab = "Frecuencia Absoluta",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, max(ni) * 1.1))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") # --- GRÁFICO 2: Histograma Global ---
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica N°2: Distribución de Profundidad Vertical (Global)",
xlab = "Profundidad Vertical (m)",
ylab = "Total Pozos",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, sum(ni)))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")h_porc <- h_base
h_porc$counts <- hi
h_porc$density <- hi
# --- GRÁFICO 3: Porcentajes (Local) ---
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica N°3: Distribución Porcentual (Local)",
xlab = "Profundidad Vertical (m)",
ylab = "Porcentaje (%)",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, max(hi)*1.2))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids, y = hi, label = paste0(round(hi, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted") # --- GRÁFICO 4: Global Porcentual ---
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica No.4: Distribución Porcentual (Global)",
xlab = "Profundidad Vertical (m)",
ylab = "% del Total",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, 100))
axis(1, at = round(breaks_raw, 0), labels = format(round(breaks_raw, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
abline(h=seq(0,100,20), col="#D7DBDD", lty="dotted")# --- GRÁFICO 5: Boxplot ---
par(mar = c(5, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado,
main = "Gráfica No.5: Diagrama de Caja (Boxplot)",
xlab = "Profundidad (m)", outline = TRUE, outpch = 19, outcol = "#C0392B",
boxwex = 0.5, frame.plot = FALSE, xaxt = "n")
eje_x_detallado <- pretty(Variable, n = 20)
axis(1, at = eje_x_detallado, labels = format(eje_x_detallado, scientific = FALSE), cex.axis=0.7, las=2)
grid(nx=NULL, ny=NA, col="lightgray", lty="dotted")# --- GRÁFICO 6: Ojivas ---
par(mar = c(5, 5, 4, 8), xpd = TRUE)
x_asc <- c(min(breaks_raw), breaks_raw[2:length(breaks_raw)])
y_asc <- c(0, Ni_asc)
x_desc <- c(breaks_raw[1:(length(breaks_raw)-1)], max(breaks_raw))
y_desc <- c(Ni_desc, 0)
if(length(x_desc) != length(y_desc)) {
x_desc <- c(TDF_Profundidad$Li, max(TDF_Profundidad$Ls))
y_desc <- c(TDF_Profundidad$Ni_desc, 0)
}
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
col_azul <- "#2E4053"
col_rojo <- "#C0392B"
plot(x_asc, y_asc, type = "o", col = col_azul, lwd=2, pch=19,
main = "Gráfica No.6: Ojivas Ascendente y Descendente",
xlab = "Profundidad (m)", ylab = "Frecuencia acumulada",
xlim = x_range, ylim = y_range, axes = FALSE, frame.plot = FALSE)
axis(1, at = round(breaks_raw,0), labels = format(round(breaks_raw,0), scientific = FALSE), las=2, cex.axis=0.6)
axis(2, at = pretty(y_asc), labels = format(pretty(y_asc), scientific = FALSE))
lines(x_desc, y_desc, type = "o", col = col_rojo, lwd=2, pch=19)
legend("right", legend = c("Ascendente", "Descendente"),
col = c(col_azul, col_rojo), lty = 1, pch = 19, cex = 0.7, lwd=2,
inset = c(-0.3, 0), bty="n")
grid()tabla_ind <- data.frame(
Variable = "Profundidad Vertical(m)",
Media = round(mean(Variable), 2),
Mediana = round(median(Variable), 2),
Moda = paste(round(TDF_Profundidad$MC[TDF_Profundidad$ni == max(TDF_Profundidad$ni)], 2), collapse=", "),
Varianza = round(var(Variable), 2),
Desv_Est = round(sd(Variable), 2),
CV_Porc = round((sd(Variable)/mean(Variable))*100, 2),
Asimetria = round(skewness(Variable), 4),
Curtosis = round(kurtosis(Variable), 2)
)
gt(tabla_ind) %>%
tab_header(
title = md("**RESUMEN ESTADÍSTICO**"),
subtitle = md("Variable: **Profundidad Vertical (m)**")
) %>%
fmt_number(columns = 2:9, decimals = 2) %>%
tab_options(
column_labels.background.color = "#2E4053",
table.border.top.color = "black",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
) %>%
tab_style(
style = list(cell_text(weight = "bold", color = "white")),
locations = cells_column_labels()
)| RESUMEN ESTADÍSTICO | ||||||||
| Variable: Profundidad Vertical (m) | ||||||||
| Variable | Media | Mediana | Moda | Varianza | Desv_Est | CV_Porc | Asimetria | Curtosis |
|---|---|---|---|---|---|---|---|---|
| Profundidad Vertical(m) | 1,798.48 | 1,304.10 | 1332.17 | 2,594,022.17 | 1,610.60 | 89.55 | 4.90 | 72.20 |