##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: LEONARDO RUIZ ####
### CARRERA: INGENIERÍA EN PETROLEOS #####


##1. Carga de Datos
library(readxl)
datos <- read_excel("C:/Users/LEO/Documents/Producción Campo Sacha.csv.xlsx")
str(datos)
## tibble [8,344 × 31] (S3: tbl_df/tbl/data.frame)
##  $ mes                   : chr [1:8344] "Ene" "Ene" "Ene" "Ene" ...
##  $ día                   : num [1:8344] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pozo                  : chr [1:8344] "SACHA-001A" "SACHA-019A" "SACHA-052B" "SACHA-083A" ...
##  $ Campo                 : chr [1:8344] "SACHA" "SACHA" "SACHA" "SACHA" ...
##  $ Reservorio            : chr [1:8344] "U" "U" "U INFERIOR" "HOLLIN INFERIOR" ...
##  $ Bpd                   : num [1:8344] NA 53 249 139 186 136 NA 456 161 164 ...
##  $ Bppd_BH               : num [1:8344] 159 NA NA NA NA NA 155 NA NA NA ...
##  $ Bfpd_BE               : num [1:8344] NA 534 346 1158 1163 ...
##  $ Bfpd_BH               : num [1:8344] 695 NA NA NA NA NA 441 NA NA NA ...
##  $ Bapd_BE               : num [1:8344] NA 481 97 1019 977 ...
##  $ Bapd_BH               : num [1:8344] 536 NA NA NA NA NA 286 NA NA NA ...
##  $ Bsw_BE                : num [1:8344] NA 90.1 28 88 84 ...
##  $ Bsw_BH                : num [1:8344] 77.1 NA NA NA NA ...
##  $ Api_BE                : num [1:8344] NA 26.7 27.8 27.7 24 20.5 NA 28.5 29.9 26.3 ...
##  $ Api_BH                : num [1:8344] 27.8 NA NA NA NA NA 23.2 NA NA NA ...
##  $ Gas_BE                : num [1:8344] NA 10.76 50.55 1.11 27.9 ...
##  $ Gas_BH                : num [1:8344] 32.3 NA NA NA NA ...
##  $ Salinidad_BE          : num [1:8344] NA 15920 30227 1600 13000 ...
##  $ Salinidad_BH          : num [1:8344] 10800 NA NA NA NA NA 3800 NA NA NA ...
##  $ Rgl_BE                : num [1:8344] NA 20.15 146.1 0.96 23.99 ...
##  $ Rgl_BH                : num [1:8344] 46.5 NA NA NA NA ...
##  $ Gor_BE                : num [1:8344] NA 203.02 203.01 7.99 150 ...
##  $ Gor_BH                : num [1:8344] 203 NA NA NA NA ...
##  $ Horas_BE              : num [1:8344] NA 4 5 4 4 10 NA 4 10 10 ...
##  $ Horas_BH              : num [1:8344] 4 NA NA NA NA NA 4 NA NA NA ...
##  $ Bomba_BE              : chr [1:8344] NA "SF-320|SF-320|SF-900|SFGH2500/520/180/9259" "RC 1000|RC 1000|RC 1000/300/120/9250" "P23/68/30/7000" ...
##  $ Bomba_BH              : chr [1:8344] "JET  12K/0//0" NA NA NA ...
##  $ Frecuencia Operaciones: num [1:8344] NA 65 62 46 59 52 NA 58.5 57 54 ...
##  $ Voltaje               : num [1:8344] NA 479 457 364 440 452 NA 475 455 439 ...
##  $ Amperaje              : num [1:8344] NA 29 35 14 59 30 NA 23 35 34 ...
##  $ Presión Intake        : num [1:8344] NA 484 406 0 345 162 NA 546 338 0 ...
##2.Extraer la variable continua 
Rgl_BE <- datos$Rgl_BE 
Rgl_BE <- as.numeric(Rgl_BE)
Rgl_BE <- na.omit(Rgl_BE)

##3. Cálculo de intervalos (sturges)
R <- max(Rgl_BE) - min(Rgl_BE)
k <- floor(1 + (3.3 * log10(length(Rgl_BE))))
A <- R / k

liminf <- seq(from = min(Rgl_BE), 
              by = A, 
              length.out = k)

limsup <- liminf + A
limsup[k] <- max(Rgl_BE)

MC <- (liminf + limsup) / 2

##4.Tabla de distribución de frecuencias
#4.1 Frecuencia absoluta
ni <- numeric(k)
for (i in 1:k) {
  if (i == k) {
    ni[i] <- sum(Rgl_BE >= liminf[i] & Rgl_BE <= limsup[i])
  } else {
    ni[i] <- sum(Rgl_BE >= liminf[i] & Rgl_BE < limsup[i])
  }
}

#4.2 Frecuencias relativas y acumuladas
hi <- (ni / length(Rgl_BE)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

#4.3 Tabla de frecuencias
tabla_Rgl_BE <- data.frame(
  Límite_Inferior = round(liminf, 2),
  Límite_Superior = round(limsup, 2),
  Marca_Clase = round(MC, 2),
  ni = ni,
  hi_porc = round(hi, 2),
  Ni_asc = Niasc,
  Ni_dsc = Nidsc,
  Hiasc_porc = round(Hiasc, 2),
  Hidsc_porc = round(Hidsc, 2))

# TABLA 1 CON GT()
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(e1071)
tabla_Rgl_BE %>%
  gt() %>%
  tab_header(
    title = md("**Tabla 1: Distribución de Frecuencias de Rgl_BE**"),
    subtitle = md("Campo Sacha | Método Sturges")  
  ) %>%
  tab_source_note(
    source_note = md("**Campo Sacha**")
  ) %>%  # ¡SOLO UN PARÉNTESIS AQUÍ!
  cols_label(
    Límite_Inferior = "L. Inferior",
    Límite_Superior = "L. Superior", 
    Marca_Clase = "Marca Clase",
    hi_porc = "hi %",
    Ni_asc = "Ni Asc.",
    Ni_dsc = "Ni Desc.",
    Hiasc_porc = "Hi Asc. %",
    Hidsc_porc = "Hi Desc. %"
  ) %>%
  fmt_number(
    columns = c(Límite_Inferior, Límite_Superior, Marca_Clase),
    decimals = 2
  ) %>%
  fmt_number(
    columns = c(hi_porc, Hiasc_porc, Hidsc_porc),
    decimals = 2,
    pattern = "{x}%"
  )
Tabla 1: Distribución de Frecuencias de Rgl_BE
Campo Sacha | Método Sturges
L. Inferior L. Superior Marca Clase ni hi % Ni Asc. Ni Desc. Hi Asc. % Hi Desc. %
0.08 116.44 58.26 2997 38.90% 2997 7705 38.90% 100.00%
116.44 232.81 174.62 2265 29.40% 5262 4708 68.29% 61.10%
232.81 349.17 290.99 978 12.69% 6240 2443 80.99% 31.71%
349.17 465.53 407.35 390 5.06% 6630 1465 86.05% 19.01%
465.53 581.90 523.71 458 5.94% 7088 1075 91.99% 13.95%
581.90 698.26 640.08 279 3.62% 7367 617 95.61% 8.01%
698.26 814.62 756.44 272 3.53% 7639 338 99.14% 4.39%
814.62 930.98 872.80 44 0.57% 7683 66 99.71% 0.86%
930.98 1,047.35 989.17 1 0.01% 7684 22 99.73% 0.29%
1,047.35 1,163.71 1,105.53 0 0.00% 7684 21 99.73% 0.27%
1,163.71 1,280.07 1,221.89 1 0.01% 7685 21 99.74% 0.27%
1,280.07 1,396.44 1,338.26 0 0.00% 7685 20 99.74% 0.26%
1,396.44 1,512.80 1,454.62 20 0.26% 7705 20 100.00% 0.26%
Campo Sacha
##5. Gráficos
#5.1 Histograma
hist(Rgl_BE,
     main = "Gráfica No.1: Distribución de Rgl_BE - Campo Sacha",
     breaks = seq(min(Rgl_BE), max(Rgl_BE) + A, by = A),
     xlab = "Rgl_BE",
     ylab = "Cantidad",
     col = "lightblue",
     border = "darkblue",
     xaxt = "n")  # IMPORTANTE: Suprimir eje X automático

# Eje X personalizado con MARCAS DE CLASE
axis(1, at = MC,  # Posiciones: Marcas de Clase
     labels = round(MC, 2),  # Etiquetas: valores redondeados
     las = 1)  # Etiquetas horizontales

#5.2 Ojivas
x_asc <- c(min(liminf), limsup)
y_asc <- c(0, Niasc)
x_desc <- c(liminf, max(limsup))
y_desc <- c(Nidsc, 0)
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))

plot(x_asc, y_asc, type = "o", col = "skyblue",
     main = "Gráfica No.2: Ojivas Ascendente y Descendente de Rgl_BE",
     xlab = "Rgl_BE",
     ylab = "Frecuencia acumulada",
     xlim = x_range, ylim = y_range,
     xaxt = "n", pch = 16, lwd = 2)

axis(1, at = pretty(x_range), 
     labels = format(pretty(x_range), scientific = FALSE))
axis(2, at = pretty(y_range))

lines(x_desc, y_desc, type = "o", col = "steelblue4", pch = 17, lwd = 2)

legend("right", 
       legend = c("Ojiva Ascendente", "Ojiva Descendente"),
       col = c("skyblue", "steelblue4"), 
       pch = c(16, 17), 
       lty = 1, 
       lwd = 2,
       cex = 0.8)

#5.3 Diagramas de cajas
boxplot(Rgl_BE, 
        horizontal = TRUE, 
        col = "steelblue",
        main = "Gráfica No.3: Distribución de Rgl_BE - Campo Sacha",
        xlab = "Rgl_BE",
        xaxt = "n")

axis(1, at = pretty(Rgl_BE), 
     labels = format(pretty(Rgl_BE), scientific = FALSE))

# Outliers

outliers <- boxplot.stats(Rgl_BE)$out
cat("\nNúmero de outliers:", length(outliers), "\n")
## 
## Número de outliers: 681
if(length(outliers) > 0) {
  cat("Outliers:", round(outliers, 2), "\n")
}
## Outliers: 1511.6 672 672.85 576 672.85 666.67 761.78 791.25 791.25 817.12 892.11 671.93 793.33 671.93 589.19 672.49 608 1512.66 605.16 576.09 783.96 672.27 672.27 658.93 573.68 751.1 696.07 701.05 672.54 666.67 659.06 1510.54 672.27 588.57 588.33 605.28 672.27 588.99 576.12 766.1 793.28 672.27 817.12 892.11 1511.6 695.5 708.93 605.13 783.96 658.93 672.27 747.88 791.25 791.25 589.57 734.48 791.21 791.25 752.08 793.22 589.38 576.06 576.06 608 676.06 1512.8 766.67 817.12 892.11 783.96 589.09 588.03 576 576.06 676.06 766.1 698.98 693.04 589.38 739.78 791.21 791.25 576 672.27 588.04 639.8 605.35 672.54 817.3 889.76 605.35 1512.8 698.98 693.04 589.66 793.33 752.33 640.4 655.38 672.54 817.3 889.76 783.96 666.67 766.1 752.33 751.04 589.29 751.67 783.96 589.66 751.97 768 605.18 588.02 1512 752.56 576 671.07 783.96 696.58 697.45 753.35 589.74 606.19 605.18 752.56 752.08 791.19 791.21 791.25 576.06 676.06 576 783.89 768.33 608.7 656.2 817.22 890.89 606.47 654.49 588.73 768.2 720.71 766.1 791.2 791.21 791.25 793.25 783.78 1510.95 589.57 655.87 589.74 655.84 576.87 752.08 575.82 783.78 696.58 697.45 768.2 588.27 768.5 605.19 1512 666.67 791.04 791.21 791.25 817.12 892.11 588.79 593.16 605.25 666.67 1510.95 768.33 588.79 766.39 817.12 892.11 656.57 605.25 672.54 671.07 606.19 703.24 576 588.79 588.25 793.25 704.07 783.89 768.27 593.16 575.34 672.04 768.33 671.49 575.82 766.1 605.18 575.82 655.56 1512 696.58 697.45 791.18 791.21 791.25 705.04 575.79 1512.66 666.67 588.12 655.84 775.17 895.98 605.33 696.58 697.45 588.99 1512.8 588.37 766.1 751.71 574.5 575.94 587.64 655.81 791.2 791.21 791.25 605.42 1511.6 593.16 575.82 606.19 783.78 771.43 704.48 783.78 767.57 594.08 767.57 575.82 793.33 696.58 697.45 671.49 666.67 588.37 775.17 895.98 605.18 1512.66 783.78 588.99 587.59 656.25 751.35 671.83 791.2 791.21 791.25 576 1511.6 767.49 671.49 764.91 671.83 704.48 575.94 791.19 791.21 791.25 605.4 588.4 656.53 589.38 775.17 895.98 854.24 588.99 608.96 765.81 791.18 791.21 791.25 752.81 605.33 576 752.41 783.71 1512.8 587.67 656.09 767.57 793.16 703.32 588.12 594.08 575.82 605.19 766.1 775.17 895.98 767.57 587.63 656.09 783.71 665.45 792.67 752.81 671.83 575.94 767.57 587.5 751.11 607.3 704.59 696.58 697.45 655.97 610.37 766.1 767.49 575.36 776.23 605.19 1512.8 671.31 608.33 576 765.81 793.28 605.4 594.1 672 610.69 1510.54 576.19 656.53 672.7 611.02 672.43 793.25 704.29 696.58 697.45 669.09 576.19 656.09 791.19 783.64 1510.54 704.59 783.71 671.35 766.1 768.63 775.17 895.98 672.16 576.19 610.69 791.18 791.21 791.25 793.16 752.41 605.24 656.25 703.11 783.64 768.63 752.81 791.21 791.21 791.18 576.16 610.69 605.19 698.98 693.04 672.09 608.33 576 766.1 609.23 575.94 783.71 671.31 669.09 656.69 753.07 768.55 587.63 594.09 576.22 791.21 791.21 791.18 775.09 892.03 783.45 793.33 696.58 697.45 607.75 752.81 655.25 672.16 704.42 576.22 791.18 791.21 791.25 576 609.23 605.33 587.63 640.32 752.27 655.81 607.75 768.99 672.19 587.63 791.2 791.25 791.25 640 765.81 605.18 696.58 697.45 594.09 575.94 656.25 793.16 703.11 669.09 703.49 765.81 783.45 793.04 791.69 669.09 575.76 791.22 791.25 791.25 606.45 669.09 605.18 576.22 765.81 576 775.09 892.03 587.63 656.09 752.27 672.16 639.88 640 575.85 784.4 588.99 759.95 671.81 576.22 791.2 791.25 791.25 776.2 890.85 640.26 612.83 823.67 751.72 703.58 588.79 698.98 718.7 594.09 574.5 639.88 576.28 771.17 704.52 791.69 594.09 575.85 890.85 589.09 576 775.03 894.49 704.52 822.87 791.73 596.15 596.15 791.21 791.25 791.25 752 784.4 589.19 574.5 595.29 764.91 698.98 718.7 671.81 669.09 793.16 823.67 791.71 577.48 594.31 757.83 771.43 751.72 791.73 670.97 791.73 670.97 766.1 576.24 576.32 823.06 752.68 784.49 594.55 765.81 573.68 720 596.15 596.15 594.09 656.09 575.85 775 895.76 791.69 589.29 791.2 791.25 791.25 751.72 594.31 757.83 793.19 751.72 822.63 588.79 594.34 766.1 775 895.76 594.09 775 903.42 791.67 588.79 735.48 882.48 696.58 697.45 765.81 791.73 594.31 822.63 577.48 793.13 671.69 575.79 671.35 791.22 751.86 784.49 669.09 588.79 791.71 784.49 823.67 769.47 791.71 670.97 791.2 791.25 791.25 696.58 697.45 670.97 596.15 596.15 594.09 752.52 713.88 594.12 791.73 735.65 882.29 735.7 884.75 594.31 784.49 594.35 823.67 594.37 574.5 656.3 698.98 685 589.09 823.1 661.11 661.11 784 791.75 670.97 669.09 769.47 793.28 594.37 575.79 670.97 752.52 791.73 713.88 816.67 766.1 588.99 1244.56 791.25 573.68 666.67 1041.25 735.7 884.75 791.75 670.97 675.47 784.91 671.95 698.98 693.04 576 587.81 791.75 671.35 661.11 661.11 594.12 594.37 656.3 575.79 793.13 784 587.44 588.79 752.1 671.72 589.09 587.81 752 695.5 701.25 791.79 577.48 766.1 594.37 672.09 588.19 575.7
##6. Indicadores estadísticos 
get_mode_interval <- function() {
  idx <- which.max(ni)
  return(paste0("[", round(liminf[idx], 2), ", ", round(limsup[idx], 2), "]"))
}

media <- mean(Rgl_BE)
mediana <- median(Rgl_BE)
moda_intervalo <- get_mode_interval()
desv <- sd(Rgl_BE)
varianza <- var(Rgl_BE)
cv <- (desv / media) * 100
asim <- skewness(Rgl_BE)
curt <- kurtosis(Rgl_BE)     

# CREAR DATA.FRAME DE INDICADORES (¡ESTA PARTE FALTABA!)
indicadores <- data.frame(
  Indicador = c("Mínimo", "Máximo", "Media", "Mediana", "Moda (intervalo)",
                "Desviación Estándar", "Varianza", "Coef. Variación (%)",
                "Asimetría", "Curtosis", "N° Outliers"),
  Valor = c(round(min(Rgl_BE), 2), round(max(Rgl_BE), 2),
            round(media, 2), round(mediana, 2), moda_intervalo,
            round(desv, 2), round(varianza, 2), round(cv, 2),
            round(asim, 2), round(curt, 2), length(outliers))
)

# TABLA 2 CON GT()
indicadores %>%
  gt() %>%
  tab_header(
    title = md("**Tabla 2: Indicadores Estadísticos de Rgl_BE**")
  ) %>%
  tab_source_note(
    source_note = md("**Campo Sacha**")
  ) %>%
  cols_label(
    Indicador = "Indicador",
    Valor = "Valor"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(columns = Indicador)
  )
Tabla 2: Indicadores Estadísticos de Rgl_BE
Indicador Valor
Mínimo 0.08
Máximo 1512.8
Media 214.76
Mediana 150.03
Moda (intervalo) [0.08, 116.44]
Desviación Estándar 211.06
Varianza 44545.44
Coef. Variación (%) 98.28
Asimetría 1.76
Curtosis 4.15
N° Outliers 681
Campo Sacha
##7. Conclusión 
#La variable Rgl_BE fluctúa entre 0.08  y 1512.8  y sus valores están en torno a los 150.03  (media = 214.76 ), con una desviación estándar de 211.06 siendo un conjunto de valores extremadamente heterogéneos  (CV = 98.28%) cuyos valores se concentran en el intervalo modal [0.08, 116.44]  y con distribución leptocúrtica (K = 4.15) y sesgo pronunciado hacia la derecha (As = 1.76) a excepción de los 681 valores atípicos identificados, por lo tanto el comportamiento de la variable indica un proceso mayoritariamente inestable con mediciones consistentes en el rango principal, aunque con presencia significativa de lecturas extremas que requieren análisis particular.