1 Configuración y Carga de Datos

library(readxl)
library(dplyr)
library(gt)
library(e1071)

Datos    <- read_excel(file.choose())
Variable <- na.omit(Datos$Latitude)
N        <- length(Variable)
str(Datos)
## tibble [49,212 × 32] (S3: tbl_df/tbl/data.frame)
##  $ Unit ID                           : chr [1:49212] "OG0000001" "OG0000002" "OG0000006" "OG0000007" ...
##  $ Unit Name                         : chr [1:49212] "Matzen" "Abalone" "Aguilhada" "Agulha" ...
##  $ Unit name local script            : chr [1:49212] NA "Abalone" "Aguilhada" "Agulha" ...
##  $ Fuel type                         : chr [1:49212] "oil and gas" "oil and gas" "oil and gas" "oil and gas" ...
##  $ Unit type                         : chr [1:49212] "field" "field" "field" "field" ...
##  $ Country                           : chr [1:49212] "Austria" "Brazil" "Brazil" "Brazil" ...
##  $ Subnational unit (province, state): chr [1:49212] NA "Espírito Santo" "Sergipe" "Rio Grande do Norte" ...
##  $ Latitude                          : num [1:49212] 48.4 -21.4 -10.7 -4.9 -22.1 ...
##  $ Longitude                         : num [1:49212] 16.7 -39.6 -36.9 -36.3 -40 ...
##  $ Location accuracy                 : chr [1:49212] "approximate" "exact" "exact" "exact" ...
##  $ Status                            : chr [1:49212] "operating" "operating" "operating" "operating" ...
##  $ Status year                       : num [1:49212] 2023 2022 2022 2022 2022 ...
##  $ Discovery year                    : num [1:49212] 1949 2001 1966 1975 1984 ...
##  $ FID Year                          : chr [1:49212] NA NA NA NA ...
##  $ Production start year             : chr [1:49212] "1951" "2009" "1969" "1979" ...
##  $ Operator                          : chr [1:49212] "OMV" "Shell Brasil Petróleo Ltda." NA NA ...
##  $ Owner                             : chr [1:49212] "OMV (100%)" "Shell Brasil (50%);ONGC Campos (27%);Qatarenergy (23%)" "Petrobras (100%)" "Petrobras (100%)" ...
##  $ Parent                            : chr [1:49212] "OMV Aktiengesellschaft (100%)" "Shell plc (50%);Oil and Natural Gas Corporation (ONGC) (27%)" "Petróleo Brasileiro S.A. (100%)" "Petróleo Brasileiro S.A. (100%)" ...
##  $ Basin                             : chr [1:49212] NA NA NA NA ...
##  $ Concession / block                : chr [1:49212] NA NA NA NA ...
##  $ Project or complex                : chr [1:49212] "Matzen" NA NA NA ...
##  $ Government unit ID                : chr [1:49212] NA NA NA NA ...
##  $ Wiki URL                          : chr [1:49212] "https://www.gem.wiki/Matzen_Oil_and_Gas_Field_(Austria)" "https://www.gem.wiki/Abalone_Oil_and_Gas_Field_%28Esp%C3%ADrito_Santo%2C_Brazil%29" "https://www.gem.wiki/Aguilhada_Oil_and_Gas_Field_%28Sergipe%2C_Brazil%29" "https://www.gem.wiki/Agulha_Oil_and_Gas_Field_%28Rio_Grande_do_Norte%2C_Brazil%29" ...
##  $ Unit name2                        : logi [1:49212] NA NA NA NA NA NA ...
##  $ Production/reserves               : logi [1:49212] NA NA NA NA NA NA ...
##  $ Fuel description                  : logi [1:49212] NA NA NA NA NA NA ...
##  $ Reserves classification (original): logi [1:49212] NA NA NA NA NA NA ...
##  $ Quantity (original)               : logi [1:49212] NA NA NA NA NA NA ...
##  $ Units (original)                  : logi [1:49212] NA NA NA NA NA NA ...
##  $ Data year                         : logi [1:49212] NA NA NA NA NA NA ...
##  $ Quantity (converted)              : logi [1:49212] NA NA NA NA NA NA ...
##  $ Units (converted)                 : logi [1:49212] NA NA NA NA NA NA ...

2 Cálculo de Intervalos y Frecuencias

# ── LÍMITES DECIMALES ──────────────────────────────────────────
min_dec      <- min(Variable)
max_dec      <- max(Variable)
k_dec        <- floor(1 + 3.322 * log10(N))
rango_dec    <- max_dec - min_dec
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec                    <- seq(min_dec, max_dec, length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max_dec + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec    <- as.vector(table(inter_dec))

# Cálculos
hi_dec      <- (ni_dec / N) * 100
Ni_asc_dec  <- cumsum(ni_dec)
Hi_asc_dec  <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal
TDF_Decimal <- data.frame(
  Li      = cortes_dec[1:k_dec],
  Ls      = cortes_dec[2:(k_dec+1)],
  MC      = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni      = ni_dec,
  hi      = hi_dec,
  Ni_asc  = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc  = Hi_asc_dec,
  Hi_desc = Hi_desc_dec
)

# ── LÍMITES ENTEROS ────────────────────────────────────────────
BASE         <- 10
min_int      <- floor(min(Variable) / BASE) * BASE
max_int      <- ceiling(max(Variable) / BASE) * BASE
k_int_sug    <- floor(1 + 3.322 * log10(N))
Rango_int    <- max_int - min_int
Amplitud_int <- ceiling((Rango_int / k_int_sug) / 10) * 10
if (Amplitud_int == 0) Amplitud_int <- 10

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 1)
if (max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}
while (length(cortes_int) > 2 && cortes_int[length(cortes_int)-1] >= max(Variable)) {
  cortes_int <- cortes_int[-length(cortes_int)]
}

K_real      <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int    <- as.vector(table(inter_int))

hi_int      <- (ni_int / N) * 100
Ni_asc_int  <- cumsum(ni_int)
Hi_asc_int  <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

TDF_Enteros <- data.frame(
  Li      = lim_inf_int,
  Ls      = lim_sup_int,
  MC      = (lim_inf_int + lim_sup_int) / 2,
  ni      = ni_int,
  hi      = hi_int,
  Ni_asc  = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc  = Hi_asc_int,
  Hi_desc = Hi_desc_int
)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

totales_dec   <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni),
                   round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Li      = "Lim. Inf",
    Ls      = "Lim. Sup",
    MC      = "Marca Clase",
    ni      = "Frec. Abs (ni)",
    hi      = "Frec. Rel (%)",
    Ni_asc  = "Ni (Asc)",
    Ni_desc = "Ni (Desc)",
    Hi_asc  = "Hi Asc (%)",
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14),
              column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Unidades Petroleras
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-53.97 -44.17 -49.07 79 1.05 79 7537 1.05 100
-44.17 -34.37 -39.27 125 1.66 204 7458 2.71 98.95
-34.37 -24.57 -29.47 53 0.7 257 7333 3.41 97.29
-24.57 -14.77 -19.67 99 1.31 356 7280 4.72 96.59
-14.77 -4.97 -9.87 162 2.15 518 7181 6.87 95.28
-4.97 4.83 -0.07 373 4.95 891 7019 11.82 93.13
4.83 14.63 9.73 528 7.01 1419 6646 18.83 88.18
14.63 24.43 19.53 269 3.57 1688 6118 22.4 81.17
24.43 34.23 29.33 2407 31.94 4095 5849 54.33 77.6
34.23 44.03 39.13 782 10.38 4877 3442 64.71 45.67
44.03 53.83 48.93 1614 21.41 6491 2660 86.12 35.29
53.83 63.63 58.73 904 11.99 7395 1046 98.12 13.88
63.63 73.43 68.53 142 1.88 7537 142 100 1.88
TOTAL - - 7537 100 - - - -
Autor: Tu Nombre

3.2 Tabla con Límites Enteros

TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li),
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

totales_int   <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni),
                   round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Li      = "Lim. Inf",
    Ls      = "Lim. Sup",
    MC      = "Marca Clase",
    ni      = "Frec. Abs (ni)",
    hi      = "Frec. Rel (%)",
    Ni_asc  = "Ni (Asc)",
    Ni_desc = "Ni (Desc)",
    Hi_asc  = "Hi Asc (%)",
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14),
              column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Unidades Petroleras
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-60 -40 -50 79 1.05 79 7537 1.05 100
-40 -20 -30 248 3.29 327 7458 4.34 98.95
-20 0 -10 309 4.1 636 7210 8.44 95.66
0 20 10 956 12.68 1592 6901 21.12 91.56
20 40 30 2971 39.42 4563 5945 60.54 78.88
40 60 50 2666 35.37 7229 2974 95.91 39.46
60 80 70 308 4.09 7537 308 100 4.09
TOTAL - - 7537 100 - - - -
Autor: Tu Nombre

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
        names.arg = TDF_Enteros$MC,
        main      = "",
        xlab      = "",
        ylab      = "",
        col       = "#FF6961",
        ylim      = c(0, max(TDF_Enteros$ni) * 1.2),
        space     = 0,
        las       = 2,
        cex.names = 0.7)
mtext("Cantidad",      side = 2, line = 4.5, cex = 1, font = 1)
mtext("Latitud (\u00b0)", side = 1, line = 4,   cex = 0.9)
mtext("Gráfica N\u00b01: Distribución de Cantidad de Unidades Petroleras por Latitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
        names.arg = TDF_Enteros$MC,
        main      = "",
        xlab      = "",
        ylab      = "",
        col       = "#FF6961",
        ylim      = c(0, max(TDF_Enteros$ni) * 1.2),
        space     = 0,
        las       = 2,
        cex.names = 0.7)
mtext("Cantidad",         side = 2, line = 4.5, cex = 1,   font = 1)
mtext("Latitud (\u00b0)", side = 1, line = 4,   cex = 0.9)
mtext("Gráfica N\u00b02: Distribución de Cantidad de Unidades Petroleras por Latitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi,
               main      = "",
               xlab      = "",
               ylab      = "Porcentaje (%)",
               col       = "#FF6961",
               ylim      = c(0, max(TDF_Enteros$hi) * 1.3),
               space     = 0,
               names.arg = TDF_Enteros$MC,
               cex.names = 0.7,
               las       = 2)
mtext("Latitud (\u00b0)", side = 1, line = 4, cex = 0.9)
mtext("Gráfica N\u00b03: Distribución Porcentual de las Unidades Petroleras por Latitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
text(x      = bp3,
     y      = TDF_Enteros$hi,
     labels = paste0(round(TDF_Enteros$hi, 2), "%"),
     pos    = 3,
     cex    = 0.6,
     col    = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi,
               main      = "",
               xlab      = "",
               ylab      = "Porcentaje (%)",
               col       = "#FF6961",
               space     = 0,
               names.arg = TDF_Enteros$MC,
               cex.names = 0.7,
               las       = 2,
               ylim      = c(0, 100))
mtext("Latitud (\u00b0)", side = 1, line = 4, cex = 0.9)
mtext("Gráfica N\u00b04: Distribución Porcentual de las Unidades Petroleras por Latitud",
      side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
text(x      = bp4,
     y      = TDF_Enteros$hi,
     labels = paste0(round(TDF_Enteros$hi, 2), "%"),
     pos    = 3,
     cex    = 0.6,
     col    = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable,
        horizontal = TRUE,
        col        = "#FF6961",
        xlab       = "Latitud (\u00b0)",
        cex.main   = 0.9,
        main       = "Gráfica N\u00b05: Distribución de la Latitud en las Unidades Petroleras")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

x_asc  <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc  <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

plot(x_asc, y_asc,
     type = "b",
     main = "",
     xlab = "Latitud (\u00b0)",
     ylab = "Frecuencia Acumulada",
     col  = "black",
     pch  = 19,
     xlim = c(min(TDF_Enteros$Li), max(x_asc)),
     ylim = c(0, sum(TDF_Enteros$ni)))

lines(x_desc, y_desc, col = "red", type = "b", pch = 19)

grid()
mtext("Gráfica N\u00b06: Ojivas Ascendentes y Descendentes de la\nDistribución de la Latitud en las Unidades Petroleras",
      side = 3, line = 3, adj = 0.5, cex = 0.9, font = 2)
legend("right",
       inset  = c(-0.2, 0),
       legend = c("Ascendente", "Descendente"),
       col    = c("black", "red"),
       lty    = 1,
       pch    = 1,
       cex    = 0.6,
       bty    = "n")


5 Indicadores Estadísticos

## TENDENCIA CENTRAL
media   <- round(mean(Variable), 2)
mediana <- round(median(Variable), 2)

max_frecuencia <- max(TDF_Enteros$ni)
moda_vals      <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt       <- paste(round(moda_vals, 2), collapse = ", ")

## DISPERSIÓN
varianza <- var(Variable)
sd_val   <- sd(Variable)
cv       <- round((sd_val / abs(media)) * 100, 2)

## FORMA
asimetria <- skewness(Variable, type = 2)
curtosis  <- kurtosis(Variable)

## OUTLIERS
Q1          <- quantile(Variable, 0.25)
Q3          <- quantile(Variable, 0.75)
IQR_val     <- Q3 - Q1
lim_inf_out <- Q1 - 1.5 * IQR_val
lim_sup_out <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf_out | Variable > lim_sup_out]
num_outliers  <- length(outliers_data)

if (num_outliers > 0) {
  rango_outliers <- paste0(num_outliers, " [",
                           round(min(outliers_data), 2), "; ",
                           round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
  Variable     = "Latitud (\u00b0)",
  Rango_MinMax = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  X            = c(media),
  Me           = c(mediana),
  Mo           = c(moda_txt),
  V            = c(varianza),
  Sd           = c(sd_val),
  Cv           = c(cv),
  As           = c(asimetria),
  K            = c(curtosis),
  Outliers     = rango_outliers
)

tabla_indicadores %>%
  gt() %>%
  tab_header(title = md("**Tabla N°3 de Indicadores Estadísticos de Latitud (°) de las Unidades Petroleras**")) %>%
  tab_source_note(source_note = "Autor: Tu Nombre") %>%
  cols_label(
    Variable     = "Variable",
    Rango_MinMax = "Rango",
    X            = "Media (X)",
    Me           = "Mediana (Me)",
    Mo           = "Moda (Mo)",
    V            = "Varianza (V)",
    Sd           = "Desv. Est. (Sd)",
    Cv           = "C.V. (%)",
    As           = "Asimetría (As)",
    K            = "Curtosis (K)",
    Outliers     = "Outliers [Intervalo]"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(
    heading.title.font.size        = px(14),
    column_labels.background.color = "#F0F0F0"
  )
Tabla N°3 de Indicadores Estadísticos de Latitud (°) de las Unidades Petroleras
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Latitud (°) [-53.97; 73.43] 32.25 32.53 30 521.1647 22.82903 70.79 -1.22218 1.697479 430 [-53.97; -7.66]
Autor: Tu Nombre