1 Configuración y Carga de Datos

library(readxl)
library(dplyr)
library(gt)
library(e1071)

Datos    <- read_excel(file.choose())
Variable <- na.omit(Datos$Longitude)
N        <- length(Variable)
str(Datos)
## tibble [49,212 × 32] (S3: tbl_df/tbl/data.frame)
##  $ Unit ID                           : chr [1:49212] "OG0000001" "OG0000002" "OG0000006" "OG0000007" ...
##  $ Unit Name                         : chr [1:49212] "Matzen" "Abalone" "Aguilhada" "Agulha" ...
##  $ Unit name local script            : chr [1:49212] NA "Abalone" "Aguilhada" "Agulha" ...
##  $ Fuel type                         : chr [1:49212] "oil and gas" "oil and gas" "oil and gas" "oil and gas" ...
##  $ Unit type                         : chr [1:49212] "field" "field" "field" "field" ...
##  $ Country                           : chr [1:49212] "Austria" "Brazil" "Brazil" "Brazil" ...
##  $ Subnational unit (province, state): chr [1:49212] NA "Espírito Santo" "Sergipe" "Rio Grande do Norte" ...
##  $ Latitude                          : num [1:49212] 48.4 -21.4 -10.7 -4.9 -22.1 ...
##  $ Longitude                         : num [1:49212] 16.7 -39.6 -36.9 -36.3 -40 ...
##  $ Location accuracy                 : chr [1:49212] "approximate" "exact" "exact" "exact" ...
##  $ Status                            : chr [1:49212] "operating" "operating" "operating" "operating" ...
##  $ Status year                       : num [1:49212] 2023 2022 2022 2022 2022 ...
##  $ Discovery year                    : num [1:49212] 1949 2001 1966 1975 1984 ...
##  $ FID Year                          : chr [1:49212] NA NA NA NA ...
##  $ Production start year             : chr [1:49212] "1951" "2009" "1969" "1979" ...
##  $ Operator                          : chr [1:49212] "OMV" "Shell Brasil Petróleo Ltda." NA NA ...
##  $ Owner                             : chr [1:49212] "OMV (100%)" "Shell Brasil (50%);ONGC Campos (27%);Qatarenergy (23%)" "Petrobras (100%)" "Petrobras (100%)" ...
##  $ Parent                            : chr [1:49212] "OMV Aktiengesellschaft (100%)" "Shell plc (50%);Oil and Natural Gas Corporation (ONGC) (27%)" "Petróleo Brasileiro S.A. (100%)" "Petróleo Brasileiro S.A. (100%)" ...
##  $ Basin                             : chr [1:49212] NA NA NA NA ...
##  $ Concession / block                : chr [1:49212] NA NA NA NA ...
##  $ Project or complex                : chr [1:49212] "Matzen" NA NA NA ...
##  $ Government unit ID                : chr [1:49212] NA NA NA NA ...
##  $ Wiki URL                          : chr [1:49212] "https://www.gem.wiki/Matzen_Oil_and_Gas_Field_(Austria)" "https://www.gem.wiki/Abalone_Oil_and_Gas_Field_%28Esp%C3%ADrito_Santo%2C_Brazil%29" "https://www.gem.wiki/Aguilhada_Oil_and_Gas_Field_%28Sergipe%2C_Brazil%29" "https://www.gem.wiki/Agulha_Oil_and_Gas_Field_%28Rio_Grande_do_Norte%2C_Brazil%29" ...
##  $ Unit name2                        : logi [1:49212] NA NA NA NA NA NA ...
##  $ Production/reserves               : logi [1:49212] NA NA NA NA NA NA ...
##  $ Fuel description                  : logi [1:49212] NA NA NA NA NA NA ...
##  $ Reserves classification (original): logi [1:49212] NA NA NA NA NA NA ...
##  $ Quantity (original)               : logi [1:49212] NA NA NA NA NA NA ...
##  $ Units (original)                  : logi [1:49212] NA NA NA NA NA NA ...
##  $ Data year                         : logi [1:49212] NA NA NA NA NA NA ...
##  $ Quantity (converted)              : logi [1:49212] NA NA NA NA NA NA ...
##  $ Units (converted)                 : logi [1:49212] NA NA NA NA NA NA ...

2 Cálculo de Intervalos y Frecuencias

# ── LÍMITES DECIMALES ──────────────────────────────────────────
min_dec      <- min(Variable)
max_dec      <- max(Variable)
k_dec        <- floor(1 + 3.322 * log10(N))
rango_dec    <- max_dec - min_dec
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec                     <- seq(min_dec, max_dec, length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max_dec + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec    <- as.vector(table(inter_dec))

# Cálculos
hi_dec      <- (ni_dec / N) * 100
Ni_asc_dec  <- cumsum(ni_dec)
Hi_asc_dec  <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal
TDF_Decimal <- data.frame(
  Li      = cortes_dec[1:k_dec],
  Ls      = cortes_dec[2:(k_dec+1)],
  MC      = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni      = ni_dec,
  hi      = hi_dec,
  Ni_asc  = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc  = Hi_asc_dec,
  Hi_desc = Hi_desc_dec
)

# ── LÍMITES ENTEROS ────────────────────────────────────────────
BASE         <- 10
min_int      <- floor(min(Variable) / BASE) * BASE
max_int      <- ceiling(max(Variable) / BASE) * BASE
k_int_sug    <- floor(1 + 3.322 * log10(N))
Rango_int    <- max_int - min_int
Amplitud_int <- ceiling((Rango_int / k_int_sug) / 10) * 10
if (Amplitud_int == 0) Amplitud_int <- 10

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 1)
if (max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}
while (length(cortes_int) > 2 && cortes_int[length(cortes_int)-1] >= max(Variable)) {
  cortes_int <- cortes_int[-length(cortes_int)]
}

K_real      <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int    <- as.vector(table(inter_int))

hi_int      <- (ni_int / N) * 100
Ni_asc_int  <- cumsum(ni_int)
Hi_asc_int  <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

TDF_Enteros <- data.frame(
  Li      = lim_inf_int,
  Ls      = lim_sup_int,
  MC      = (lim_inf_int + lim_sup_int) / 2,
  ni      = ni_int,
  hi      = hi_int,
  Ni_asc  = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc  = Hi_asc_int,
  Hi_desc = Hi_desc_int
)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

totales_dec   <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni),
                   round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Longitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Li      = "Lim. Inf",
    Ls      = "Lim. Sup",
    MC      = "Marca Clase",
    ni      = "Frec. Abs (ni)",
    hi      = "Frec. Rel (%)",
    Ni_asc  = "Ni (Asc)",
    Ni_desc = "Ni (Desc)",
    Hi_asc  = "Hi Asc (%)",
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14),
              column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Longitud (°) de las Unidades Petroleras
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-152.13 -127.01 -139.57 37 0.49 37 7537 0.49 100
-127.01 -101.9 -114.46 2327 30.87 2364 7500 31.37 99.51
-101.9 -76.79 -89.34 1991 26.42 4355 5173 57.78 68.63
-76.79 -51.67 -64.23 719 9.54 5074 3182 67.32 42.22
-51.67 -26.56 -39.11 105 1.39 5179 2463 68.71 32.68
-26.56 -1.44 -14 66 0.88 5245 2358 69.59 31.29
-1.44 23.67 11.12 1157 15.35 6402 2292 84.94 30.41
23.67 48.79 36.23 306 4.06 6708 1135 89 15.06
48.79 73.9 61.35 354 4.7 7062 829 93.7 11
73.9 99.02 86.46 148 1.96 7210 475 95.66 6.3
99.02 124.13 111.57 275 3.65 7485 327 99.31 4.34
124.13 149.25 136.69 41 0.54 7526 52 99.85 0.69
149.25 174.36 161.8 11 0.15 7537 11 100 0.15
TOTAL - - 7537 100 - - - -
Autor: Tu Nombre

3.2 Tabla con Límites Enteros

TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li),
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

totales_int   <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni),
                   round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Longitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Li      = "Lim. Inf",
    Ls      = "Lim. Sup",
    MC      = "Marca Clase",
    ni      = "Frec. Abs (ni)",
    hi      = "Frec. Rel (%)",
    Ni_asc  = "Ni (Asc)",
    Ni_desc = "Ni (Desc)",
    Hi_asc  = "Hi Asc (%)",
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14),
              column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Longitud (°) de las Unidades Petroleras
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-160 -130 -145 37 0.49 37 7537 0.49 100
-130 -100 -115 2689 35.68 2726 7500 36.17 99.51
-100 -70 -85 2018 26.77 4744 4811 62.94 63.83
-70 -40 -55 403 5.35 5147 2793 68.29 37.06
-40 -10 -25 51 0.68 5198 2390 68.97 31.71
-10 20 5 1130 14.99 6328 2339 83.96 31.03
20 50 35 431 5.72 6759 1209 89.68 16.04
50 80 65 384 5.09 7143 778 94.77 10.32
80 110 95 183 2.43 7326 394 97.2 5.23
110 140 125 178 2.36 7504 211 99.56 2.8
140 170 155 26 0.34 7530 33 99.91 0.44
170 200 185 7 0.09 7537 7 100 0.09
TOTAL - - 7537 100 - - - -
Autor: Tu Nombre

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
        names.arg = TDF_Enteros$MC,
        main      = "",
        xlab      = "",
        ylab      = "",
        col       = "#FF6961",
        ylim      = c(0, max(TDF_Enteros$ni) * 1.2),
        space     = 0,
        las       = 2,
        cex.names = 0.7)
mtext("Cantidad",         side = 2, line = 4.5, cex = 1,   font = 1)
mtext("Longitud (\u00b0)", side = 1, line = 4,   cex = 0.9)
mtext("Gráfica N\u00b01: Distribución de Cantidad de Unidades Petroleras por Longitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
        names.arg = TDF_Enteros$MC,
        main      = "",
        xlab      = "",
        ylab      = "",
        col       = "#FF6961",
        ylim      = c(0, max(TDF_Enteros$ni) * 1.2),
        space     = 0,
        las       = 2,
        cex.names = 0.7)
mtext("Cantidad",          side = 2, line = 4.5, cex = 1,   font = 1)
mtext("Longitud (\u00b0)", side = 1, line = 4,   cex = 0.9)
mtext("Gráfica N\u00b02: Distribución de Cantidad de Unidades Petroleras por Longitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi,
               main      = "",
               xlab      = "",
               ylab      = "Porcentaje (%)",
               col       = "#FF6961",
               ylim      = c(0, max(TDF_Enteros$hi) * 1.3),
               space     = 0,
               names.arg = TDF_Enteros$MC,
               cex.names = 0.7,
               las       = 2)
mtext("Longitud (\u00b0)", side = 1, line = 4, cex = 0.9)
mtext("Gráfica N\u00b03: Distribución Porcentual de las Unidades Petroleras por Longitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
text(x      = bp3,
     y      = TDF_Enteros$hi,
     labels = paste0(round(TDF_Enteros$hi, 2), "%"),
     pos    = 3,
     cex    = 0.6,
     col    = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi,
               main      = "",
               xlab      = "",
               ylab      = "Porcentaje (%)",
               col       = "#FF6961",
               space     = 0,
               names.arg = TDF_Enteros$MC,
               cex.names = 0.7,
               las       = 2,
               ylim      = c(0, 100))
mtext("Longitud (\u00b0)", side = 1, line = 4, cex = 0.9)
mtext("Gráfica N\u00b04: Distribución Porcentual de las Unidades Petroleras por Longitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
text(x      = bp4,
     y      = TDF_Enteros$hi,
     labels = paste0(round(TDF_Enteros$hi, 2), "%"),
     pos    = 3,
     cex    = 0.6,
     col    = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable,
        horizontal = TRUE,
        col        = "#FF6961",
        xlab       = "Longitud (\u00b0)",
        cex.main   = 0.9,
        main       = "Gráfica N\u00b05: Distribución de la Longitud en las Unidades Petroleras")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

x_asc  <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc  <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

plot(x_asc, y_asc,
     type = "b",
     main = "",
     xlab = "Longitud (\u00b0)",
     ylab = "Frecuencia Acumulada",
     col  = "black",
     pch  = 19,
     xlim = c(min(TDF_Enteros$Li), max(x_asc)),
     ylim = c(0, sum(TDF_Enteros$ni)))

lines(x_desc, y_desc, col = "red", type = "b", pch = 19)

grid()
mtext("Gráfica N\u00b06: Ojivas Ascendentes y Descendentes de la\nDistribución de la Longitud en las Unidades Petroleras",
      side = 3, line = 3, adj = 0.5, cex = 0.9, font = 2)
legend("right",
       inset  = c(-0.2, 0),
       legend = c("Ascendente", "Descendente"),
       col    = c("black", "red"),
       lty    = 1,
       pch    = 1,
       cex    = 0.6,
       bty    = "n")


5 Indicadores Estadísticos

## TENDENCIA CENTRAL
media   <- round(mean(Variable), 2)
mediana <- round(median(Variable), 2)

max_frecuencia <- max(TDF_Enteros$ni)
moda_vals      <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt       <- paste(round(moda_vals, 2), collapse = ", ")

## DISPERSIÓN
varianza <- var(Variable)
sd_val   <- sd(Variable)
cv       <- round((sd_val / abs(media)) * 100, 2)

## FORMA
asimetria <- skewness(Variable, type = 2)
curtosis  <- kurtosis(Variable)

## OUTLIERS
Q1          <- quantile(Variable, 0.25)
Q3          <- quantile(Variable, 0.75)
IQR_val     <- Q3 - Q1
lim_inf_out <- Q1 - 1.5 * IQR_val
lim_sup_out <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf_out | Variable > lim_sup_out]
num_outliers  <- length(outliers_data)

if (num_outliers > 0) {
  rango_outliers <- paste0(num_outliers, " [",
                           round(min(outliers_data), 2), "; ",
                           round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
  Variable     = "Longitud (\u00b0)",
  Rango_MinMax = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  X            = c(media),
  Me           = c(mediana),
  Mo           = c(moda_txt),
  V            = c(varianza),
  Sd           = c(sd_val),
  Cv           = c(cv),
  As           = c(asimetria),
  K            = c(curtosis),
  Outliers     = rango_outliers
)

tabla_indicadores %>%
  gt() %>%
  tab_header(title = md("**Tabla N°3 de Indicadores Estadísticos de Longitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Variable     = "Variable",
    Rango_MinMax = "Rango",
    X            = "Media (X)",
    Me           = "Mediana (Me)",
    Mo           = "Moda (Mo)",
    V            = "Varianza (V)",
    Sd           = "Desv. Est. (Sd)",
    Cv           = "C.V. (%)",
    As           = "Asimetría (As)",
    K            = "Curtosis (K)",
    Outliers     = "Outliers [Intervalo]"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(
    heading.title.font.size        = px(14),
    column_labels.background.color = "#F0F0F0"
  )
Tabla N°3 de Indicadores Estadísticos de Longitud (°) de las Unidades Petroleras
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Longitud (°) [-152.13; 174.36] -54.65 -93.4 -115 4610.381 67.89978 124.24 1.072274 -0.05216078 7 [173.31; 174.36]
Autor: Tu Nombre