Latitud Base de los Pozos Petroleros
# 1. LIBRERÍAS Y CARGA DE DATOS
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
library(e1071)
#
setwd("C:/Users/Usuario/Desktop/TRABAJO DE ESTADISTICA/PDF-EXCEL-QGIS")
Datos_Brutos <- read.csv("Pozos Brasil 2.csv", header = TRUE, sep = ";", dec = ".", fileEncoding = "LATIN1")
colnames(Datos_Brutos) <- trimws(colnames(Datos_Brutos))
colnames(Datos_Brutos)
## [1] "POCO" "CADASTRO"
## [3] "OPERADOR" "POCO_OPERADOR"
## [5] "ESTADO" "BACIA"
## [7] "BLOCO" "SIG_CAMPO"
## [9] "CAMPO" "TERRA_MAR"
## [11] "POCO_POS_ANP" "TIPO"
## [13] "CATEGORIA" "RECLASSIFICACAO"
## [15] "SITUACAO" "INICIO"
## [17] "TERMINO" "CONCLUSAO"
## [19] "TITULARIDADE" "LATITUDE_BASE_4C"
## [21] "LONGITUDE_BASE_4C" "LATITUDE_BASE_DD"
## [23] "LONGITUDE_BASE_DD" "DATUM_HORIZONTAL"
## [25] "TIPO_DE_COORDENADA_DE_BASE" "DIRECAO"
## [27] "PROFUNDIDADE_VERTICAL_M" "PROFUNDIDADE_SONDADOR_M"
## [29] "PROFUNDIDADE_MEDIDA_M" "REFERENCIA_DE_PROFUNDIDADE"
## [31] "MESA_ROTATIVA" "COTA_ALTIMETRICA_M"
## [33] "LAMINA_D_AGUA_M" "DATUM_VERTICAL"
## [35] "UNIDADE_ESTRATIGRAFICA" "GEOLOGIA_GRUPO_FINAL"
## [37] "GEOLOGIA_FORMACAO_FINAL" "GEOLOGIA_MEMBRO_FINAL"
## [39] "CDPE" "AGP"
## [41] "PC" "PAG"
## [43] "PERFIS_CONVENCIONAIS" "DURANTE_PERFURACAO"
## [45] "PERFIS_DIGITAIS" "PERFIS_PROCESSADOS"
## [47] "PERFIS_ESPECIAIS" "AMOSTRA_LATERAL"
## [49] "SISMICA" "TABELA_TEMPO_PROFUNDIDADE"
## [51] "DADOS_DIRECIONAIS" "TESTE_A_CABO"
## [53] "TESTE_DE_FORMACAO" "CANHONEIO"
## [55] "TESTEMUNHO" "GEOQUIMICA"
## [57] "SIG_SONDA" "NOM_SONDA"
## [59] "DHA_ATUALIZACAO"
Datos <- Datos_Brutos %>%
select(any_of(c("POCO", "LATITUDE_BASE_DD"))) %>%
mutate(Variable_Analisis = as.numeric(gsub(",", ".", LATITUDE_BASE_DD)))
Variable <- na.omit(Datos$Variable_Analisis)
Variable <- Variable[Variable > -35 & Variable < 10]
head(Variable)
## [1] -21.96268 -23.10267 -21.13522 -21.13682 -21.13994 -12.13336
# 2. CÁLCULOS MATEMÁTICOS PARA LA TABLA
N <- length(Variable)
min_val <- min(Variable)
max_val <- max(Variable)
Rango <- max_val - min_val
K <- floor(1 + 3.322 * log10(N))
Amplitud <- Rango / K
breaks_table <- seq(min_val, max_val, length.out = K + 1)
breaks_table[length(breaks_table)] <- max_val + 0.0001
lim_inf_table <- breaks_table[1:K]
lim_sup_table <- breaks_table[2:(K+1)]
MC <- (lim_inf_table + lim_sup_table) / 2
ni <- numeric(K)
for (i in 1:K) {
if (i < K) {
ni[i] <- length(Variable[Variable >= lim_inf_table[i] & Variable < lim_sup_table[i]])
} else {
ni[i] <- length(Variable[Variable >= lim_inf_table[i] & Variable <= lim_sup_table[i]])
}
}
hi <- (ni / sum(ni)) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
TDF_Latitud <- data.frame(
Li = round(lim_inf_table, 4),
Ls = round(lim_sup_table, 4),
MC = round(MC, 4),
ni = ni,
hi = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = round(Hi_asc, 2),
Hi_desc = round(Hi_desc, 2)
)
# Totales
totales <- c("TOTAL", "-", "-", sum(ni), round(sum(hi), 2), "-", "-", "-", "-")
TDF_Char <- TDF_Latitud %>% mutate(across(everything(), as.character))
TDF_Final <- rbind(TDF_Char, totales)
TDF_Final %>%
gt() %>%
tab_header(
title = md("**DISTRIBUCIÓN DE FRECUENCIAS DE POZOS PETROLEROS DE BRASIL**"),
subtitle = md("**Variable: LATITUD BASE**")
) %>%
tab_source_note(source_note = "Fuente: Datos ANP 2018") %>%
cols_label(
Li = "Lim Inf",
Ls = "Lim Sup",
MC = "Marca Clase (Xi)",
ni = "ni",
hi = "hi (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi (Asc)",
Hi_desc = "Hi (Desc)"
) %>%
cols_align(
align = "center",
columns = everything()
) %>%
tab_style(
style = list(
cell_fill(color = "#2E4053"),
cell_text(color = "white", weight = "bold")
),
locations = cells_title()
) %>%
tab_style(
style = list(
cell_fill(color = "#F2F3F4"),
cell_text(weight = "bold", color = "#2E4053")
),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "#2E4053",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
)
| DISTRIBUCIÓN DE FRECUENCIAS DE POZOS PETROLEROS DE BRASIL | ||||||||
| Variable: LATITUD BASE | ||||||||
| Lim Inf | Lim Sup | Marca Clase (Xi) | ni | hi (%) | Ni (Asc) | Ni (Desc) | Hi (Asc) | Hi (Desc) |
|---|---|---|---|---|---|---|---|---|
| -32.9266 | -30.4296 | -31.6781 | 12 | 0.04 | 12 | 29575 | 0.04 | 100 |
| -30.4296 | -27.9326 | -29.1811 | 18 | 0.06 | 30 | 29563 | 0.1 | 99.96 |
| -27.9326 | -25.4357 | -26.6841 | 303 | 1.02 | 333 | 29545 | 1.13 | 99.9 |
| -25.4357 | -22.9387 | -24.1872 | 877 | 2.97 | 1210 | 29242 | 4.09 | 98.87 |
| -22.9387 | -20.4417 | -21.6902 | 3079 | 10.41 | 4289 | 28365 | 14.5 | 95.91 |
| -20.4417 | -17.9447 | -19.1932 | 2029 | 6.86 | 6318 | 25286 | 21.36 | 85.5 |
| -17.9447 | -15.4478 | -16.6963 | 135 | 0.46 | 6453 | 23257 | 21.82 | 78.64 |
| -15.4478 | -12.9508 | -14.1993 | 163 | 0.55 | 6616 | 23122 | 22.37 | 78.18 |
| -12.9508 | -10.4538 | -11.7023 | 11444 | 38.69 | 18060 | 22959 | 61.07 | 77.63 |
| -10.4538 | -7.9569 | -9.2053 | 1051 | 3.55 | 19111 | 11515 | 64.62 | 38.93 |
| -7.9569 | -5.4599 | -6.7084 | 611 | 2.07 | 19722 | 10464 | 66.68 | 35.38 |
| -5.4599 | -2.9629 | -4.2114 | 9444 | 31.93 | 29166 | 9853 | 98.62 | 33.32 |
| -2.9629 | -0.4659 | -1.7144 | 274 | 0.93 | 29440 | 409 | 99.54 | 1.38 |
| -0.4659 | 2.031 | 0.7825 | 52 | 0.18 | 29492 | 135 | 99.72 | 0.46 |
| 2.031 | 4.5281 | 3.2796 | 83 | 0.28 | 29575 | 83 | 100 | 0.28 |
| TOTAL | - | - | 29575 | 100 | - | - | - | - |
| Fuente: Datos ANP 2018 | ||||||||
col_gris_azulado <- "#5D6D7E"
col_ejes <- "#2E4053"
h_base <- hist(Variable, breaks = "Sturges", plot = FALSE)
# GRÁFICO 1: Histograma Absoluto (Local)
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica No.1: Distribución de Latitud Base",
xlab = "Latitud Base",
ylab = "Frecuencia Absoluta",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, max(h_base$counts) * 1.1))
axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")
# GRÁFICO 2: Histograma Global
par(mar = c(8, 5, 4, 2))
plot(h_base,
main = "Gráfica N°2: Distribución de Latitud Base",
xlab = "Latitud Base",
ylab = "Total Pozos",
col = col_gris_azulado, border = "white", axes = FALSE,
ylim = c(0, sum(h_base$counts)))
axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")
h_porc <- h_base
h_porc$counts <- (h_porc$counts / sum(h_porc$counts)) * 100
h_porc$density <- h_porc$counts
# GRÁFICO 3: Porcentajes (Local)
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica N°3: Distribución Porcentual de Latitud Base",
xlab = "Latitud Base",
ylab = "Porcentaje (%)",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, max(h_porc$counts)*1.2))
axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
axis(2)
text(x = h_base$mids, y = h_porc$counts, label = paste0(round(h_porc$counts, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
grid(nx=NA, ny=NULL, col="#D7DBDD", lty="dotted")
# GRÁFICO 4: Global Porcentual
par(mar = c(8, 5, 4, 2))
plot(h_porc,
main = "Gráfica No.4: Distribución Porcentual de Latitud Base",
xlab = "Latitud Base",
ylab = "% del Total",
col = col_gris_azulado, border = "white", axes = FALSE, freq = TRUE,
ylim = c(0, 100))
axis(1, at = round(h_base$breaks, 0), labels = format(round(h_base$breaks, 0), scientific = FALSE), las = 2, cex.axis = 0.7)
text(x = h_base$mids, y = h_porc$counts, label = paste0(round(h_porc$counts, 1), "%"), pos = 3, cex = 0.6, col = col_ejes)
axis(2)
abline(h=seq(0,100,20), col="#D7DBDD", lty="dotted")
# GRÁFICO 5: Boxplot
par(mar = c(5, 5, 4, 2))
boxplot(Variable, horizontal = TRUE, col = col_gris_azulado,
main = "Gráfica No.5: Diagrama de Caja de Latitud Base (Boxplot)",
xlab = "Profundidad Vertical (m)", outline = TRUE, outpch = 19, outcol = "#C0392B",
boxwex = 0.5, frame.plot = FALSE, xaxt = "n")
eje_x_detallado <- pretty(Variable, n = 20)
axis(1, at = eje_x_detallado, labels = format(eje_x_detallado, scientific = FALSE), cex.axis=0.7, las=2)
grid(nx=NULL, ny=NA, col="lightgray", lty="dotted")
par(mar = c(5, 5, 4, 8), xpd = TRUE)
x_asc <- c(min(breaks_table), breaks_table[2:length(breaks_table)])
y_asc <- c(0, Ni_asc)
x_desc <- c(min(breaks_table), breaks_table[2:length(breaks_table)])
y_desc <- c(Ni_desc, 0)
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
col_azul <- "#2E4053"
col_rojo <- "#C0392B"
plot(x_asc, y_asc, type = "o", col = col_azul, lwd=2, pch=19,
main = "Gráfica No.6: Ojivas Ascendente y Descendente de Latitud Base",
xlab = "Latitud Base", ylab = "Frecuencia acumulada",
xlim = x_range, ylim = y_range, axes = FALSE, frame.plot = FALSE)
axis(1, at = round(breaks_table,0), labels = format(round(breaks_table,0), scientific = FALSE), las=2, cex.axis=0.6)
axis(2, at = pretty(y_asc), labels = format(pretty(y_asc), scientific = FALSE))
lines(x_asc, y_desc, type = "o", col = col_rojo, lwd=2, pch=19)
legend("right", legend = c("Ascendente", "Descendente"),
col = c(col_azul, col_rojo), lty = 1, pch = 19, cex = 0.7, lwd=2,
inset = c(-0.15, 0), bty="n")
grid()
# CÁLCULO DE INDICADORES
media_val <- mean(Variable)
mediana_val <- median(Variable)
freq_max <- max(TDF_Latitud$ni)
modas_calc <- TDF_Latitud$MC[TDF_Latitud$ni == freq_max]
moda_txt <- paste(round(modas_calc, 2), collapse = ", ")
rango_txt <- paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]")
varianza_val <- var(Variable)
sd_val <- sd(Variable)
cv_val <- (sd_val / abs(media_val)) * 100
asimetria_val <- skewness(Variable, type = 2)
curtosis_val <- kurtosis(Variable, type = 2)
vals_atipicos <- boxplot.stats(Variable)$out
num_atipicos <- length(vals_atipicos)
status_atipicos <- if(num_atipicos > 0) {
min_out <- min(vals_atipicos)
max_out <- max(vals_atipicos)
paste0(num_atipicos, " [", round(min_out, 2), "; ", round(max_out, 2), "]")
} else {
"0 (Sin atípicos)"
}
df_resumen <- data.frame(
"Variable" = "Profundidad Vertical (m)",
"Rango" = rango_txt,
"Media" = media_val,
"Mediana" = mediana_val,
"Moda" = moda_txt,
"Varianza" = varianza_val,
"Desv_Std" = sd_val,
"CV_Porc" = cv_val,
"Asimetria" = asimetria_val,
"Curtosis" = curtosis_val,
"Atipicos" = status_atipicos
)
df_resumen %>%
gt() %>%
tab_header(
title = md("**CONCLUSIONES Y ESTADÍSTICOS**"),
subtitle = "Resumen de Indicadores de Profundidad Vertical de los Pozos Petrolíferos en Brasil"
) %>%
tab_source_note(source_note = "Autor: Grupo 3") %>%
fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
fmt_number(columns = c(Asimetria), decimals = 4) %>%
cols_label(
Variable = "Variable",
Rango = "Rango Total",
Media = "Media (X̄)",
Mediana = "Mediana (Me)",
Moda = "Moda (Mo)",
Varianza = "Varianza (S²)",
Desv_Std = "Desv. Est. (S)",
CV_Porc = "C.V. (%)",
Asimetria = "Asimetría (As)",
Curtosis = "Curtosis (K)",
Atipicos = "Outliers [Intervalo]"
) %>%
tab_options(
column_labels.background.color = "#2E4053",
table.border.top.color = "black",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(8)
) %>%
tab_style(
style = list(cell_text(weight = "bold", color = "white")),
locations = cells_column_labels()
)
| CONCLUSIONES Y ESTADÍSTICOS | ||||||||||
| Resumen de Indicadores de Profundidad Vertical de los Pozos Petrolíferos en Brasil | ||||||||||
| Variable | Rango Total | Media (X̄) | Mediana (Me) | Moda (Mo) | Varianza (S²) | Desv. Est. (S) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers [Intervalo] |
|---|---|---|---|---|---|---|---|---|---|---|
| Profundidad Vertical (m) | [-32.93; 4.53] | −11.41 | −10.71 | -11.7 | 39.50 | 6.28 | 55.06 | −0.6504 | −0.51 | 810 [-32.93; -23.71] |
| Autor: Grupo 3 | ||||||||||
La variable Latitud Base fluctúa entre –32.93 y 4.53, y sus valores se encuentran en torno a –11 (media = –11.41; mediana = –10.71), con una desviación estándar de 6.28, siendo un conjunto de datos altamente heterogéneo (CV = 55.06 %), cuyos valores se concentran ligeramente hacia latitudes más bajas (asimetría = –0.6504), presenta valores atípicos desde –32.93 hasta –23.71, por lo que la variable se comporta de forma dispersa y asimétrica negativa.