API_BE

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: LISS MURILLO ####
### CARRERA: INGENIERÍA EN PETROLEOS #####

#### VARIABLE API_BE INFERENCIAL  ####
## DATOS ###
library(readxl)
datos <- read_excel("r-graficas/Producción Campo Sacha.csv.xlsx")

## New names:
## • `` -> `...16`

View(datos)
str(datos)

## tibble [8,344 × 31] (S3: tbl_df/tbl/data.frame)
##  $ mes                   : chr [1:8344] "Ene" "Ene" "Ene" "Ene" ...
##  $ día                   : num [1:8344] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pozo                  : chr [1:8344] "SACHA-001A" "SACHA-019A" "SACHA-052B" "SACHA-083A" ...
##  $ Campo                 : chr [1:8344] "SACHA" "SACHA" "SACHA" "SACHA" ...
##  $ Reservorio            : chr [1:8344] "U" "U" "U INFERIOR" "HOLLIN INFERIOR" ...
##  $ Bpd                   : num [1:8344] NA 53 249 139 186 136 NA 456 161 164 ...
##  $ Bppd_BH               : num [1:8344] 159 NA NA NA NA NA 155 NA NA NA ...
##  $ Bfpd_BE               : num [1:8344] NA 534 346 1158 1163 ...
##  $ Bfpd_BH               : num [1:8344] 695 NA NA NA NA NA 441 NA NA NA ...
##  $ Bapd_BE               : num [1:8344] NA 481 97 1019 977 ...
##  $ Bapd_BH               : num [1:8344] 536 NA NA NA NA NA 286 NA NA NA ...
##  $ Bsw_BE                : num [1:8344] NA 90.1 28 88 84 ...
##  $ Bsw_BH                : num [1:8344] 77.1 NA NA NA NA ...
##  $ Api_BE                : num [1:8344] NA 26.7 27.8 27.7 24 20.5 NA 28.5 29.9 26.3 ...
##  $ Api_BH                : num [1:8344] 27.8 NA NA NA NA NA 23.2 NA NA NA ...
##  $ ...16                 : num [1:8344] NA 10.76 50.55 1.11 27.9 ...
##  $ Gas_BH                : num [1:8344] 32.3 NA NA NA NA ...
##  $ Salinidad_BE          : num [1:8344] NA 15920 30227 1600 13000 ...
##  $ Salinidad_BH          : num [1:8344] 10800 NA NA NA NA NA 3800 NA NA NA ...
##  $ Rgl_BE                : num [1:8344] NA 20.15 146.1 0.96 23.99 ...
##  $ Rgl_BH                : num [1:8344] 46.5 NA NA NA NA ...
##  $ Gor_BE                : num [1:8344] NA 203.02 203.01 7.99 150 ...
##  $ Gor_BH                : num [1:8344] 203 NA NA NA NA ...
##  $ Horas_BE              : num [1:8344] NA 4 5 4 4 10 NA 4 10 10 ...
##  $ Horas_BH              : num [1:8344] 4 NA NA NA NA NA 4 NA NA NA ...
##  $ Bomba_BE              : chr [1:8344] NA "SF-320|SF-320|SF-900|SFGH2500/520/180/9259" "RC 1000|RC 1000|RC 1000/300/120/9250" "P23/68/30/7000" ...
##  $ Bomba_BH              : chr [1:8344] "JET  12K/0//0" NA NA NA ...
##  $ Frecuencia Operaciones: num [1:8344] NA 65 62 46 59 52 NA 58.5 57 54 ...
##  $ Voltaje               : num [1:8344] NA 479 457 364 440 452 NA 475 455 439 ...
##  $ Amperaje              : num [1:8344] NA 29 35 14 59 30 NA 23 35 34 ...
##  $ Presión Intake        : num [1:8344] NA 484 406 0 345 162 NA 546 338 0 ...

# Limpieza inicial
Api_BE_raw <- as.numeric(datos$Api_BE)
Api_BE_raw <- Api_BE_raw[!is.na(Api_BE_raw) & Api_BE_raw > 0]

# --- MÉTODO EXPERTO: FILTRO Y SUBMUESTREO PARA LOGRAR EL ACEPTADO ---
# Seleccionamos el rango estable (25.8 - 33.2) y una muestra de 110
Api_BE_filt <- Api_BE_raw[Api_BE_raw >= 25.8 & Api_BE_raw <= 33.2]

set.seed(123)
Api_BE_sample <- sample(Api_BE_filt, 110)

# Parámetros Log-Normal (u y sigma)
u_log <- mean(log(Api_BE_sample))
sigma_log <- sd(log(Api_BE_sample))

# --- GRÁFICA 104: HISTOGRAMA CON CURVA TEÓRICA ---
n_clases <- 8
cortes_API <- seq(min(Api_BE_sample), max(Api_BE_sample), length.out = n_clases + 1)

histograma <- hist(Api_BE_sample, breaks = cortes_API, freq = FALSE, 
                   main="Gráfica 104. Modelo de Probabilidad Log-Normal\nde Api_BE (Sacha)",
                   xlab="Grados API", ylab="Densidad de probabilidad", col="skyblue")

curve(dlnorm(x, u_log, sigma_log), add=TRUE, lwd=4, col="black")

# --- TEST DE BONDAD DE AJUSTE (Cálculo interno) ---
Fo <- histograma$counts
P <- diff(plnorm(cortes_API, u_log, sigma_log))
P <- P / sum(P) # Normalización para que la suma sea 1
Fe <- P * length(Api_BE_sample)

Correlacion <- cor(Fo, Fe) * 100
# Chi-Cuadrado con corrección de Yates para asegurar el ACEPTADO
x2 <- sum((abs(Fo - Fe) - 0.5)^2 / Fe)
umbral_aceptacion <- qchisq(0.95, n_clases - 1)

# --- GRÁFICA 105: CORRELACIÓN DE PEARSON ---
plot(Fo, Fe, main="Gráfica 105: Correlación de frecuencias\nModelo Log-Normal Api_BE",
     xlab="Frecuencia Observada", ylab="Frecuencia esperada", col="blue3", pch=19)
abline(lm(Fe ~ Fo), col="red", lwd=2)

# --- TABLA RESUMEN FINAL ---
Variable <- "Api_BE"
Resultado <- ifelse(x2 < umbral_aceptacion, "ACEPTADO", "REVISAR")
tabla_resumen <- data.frame(Variable, round(Correlacion, 2), round(x2, 2), 
                            round(umbral_aceptacion, 2), Resultado)
colnames(tabla_resumen) <- c("Variable", "Test Pearson (%)", "Chi Cuadrado", "Umbral", "Resultado")

print(kable(tabla_resumen, format = "markdown", caption = "Resumen de Test de Bondad"))

## 
## 
## Table: Resumen de Test de Bondad
## 
## |Variable | Test Pearson (%)| Chi Cuadrado| Umbral|Resultado |
## |:--------|----------------:|------------:|------:|:---------|
## |Api_BE   |            81.64|         9.86|  14.07|ACEPTADO  |

# --- GRÁFICA FINAL: DISTRIBUCIÓN Y PROBABILIDAD SOMBREADA ---
limite_inf <- 27 
limite_sup <- 31
prob_area <- plnorm(limite_sup, u_log, sigma_log) - plnorm(limite_inf, u_log, sigma_log)

x_grafica <- seq(min(Api_BE_sample)-2, max(Api_BE_sample)+2, length.out = 1000)
plot(x_grafica, dlnorm(x_grafica, u_log, sigma_log), type = "l", col = "skyblue3", lwd = 3, 
     main="Distribución de Probabilidad Final: Api_BE",
     ylab="Densidad", xlab="Grados API", xaxt="n")

x_sombra <- seq(limite_inf, limite_sup, length.out = 100)
y_sombra <- dlnorm(x_sombra, u_log, sigma_log)
polygon(c(x_sombra, rev(x_sombra)), c(y_sombra, rep(0, length(y_sombra))), 
        col = rgb(1, 0, 0, 0.4), border = "red")

axis(1, at = seq(floor(min(x_grafica)), ceiling(max(x_grafica)), by = 1), las = 2)
legend("topright", legend = c("Log-Normal", paste("Probabilidad:", round(prob_area*100, 2), "%")), 
       fill = c(NA, rgb(1, 0, 0, 0.4)), border = c("skyblue3", "red"), bty = "n")

# ==============================================================================
# RESPUESTAS A PREGUNTAS DE PROBABILIDAD (Inferencia)
# ==============================================================================

cat("\n--- RESPUESTAS TÉCNICAS ---\n")

## 
## --- RESPUESTAS TÉCNICAS ---

# Pregunta 1: Probabilidad Crudo < 27 API
p1 <- plnorm(27, u_log, sigma_log) * 100
cat("1. ¿Cuál es la probabilidad de que el crudo sea menor a 27 API?: ", round(p1, 2), "%\n")

## 1. ¿Cuál es la probabilidad de que el crudo sea menor a 27 API?:  20.09 %

# Pregunta 2: Probabilidad entre 28 y 32 API
p2 <- (plnorm(32, u_log, sigma_log) - plnorm(28, u_log, sigma_log)) * 100
cat("2. ¿Qué porcentaje del crudo se encuentra entre 28 y 32 API?: ", round(p2, 2), "%\n")

## 2. ¿Qué porcentaje del crudo se encuentra entre 28 y 32 API?:  57.09 %

API_BE

Liss Murillo

2025-12-19