#=============================ENCABEZADO========================================
# TEMA: ED Variables Cuantitativas - ANIO DE INICIO DE LA PERFORACION
# AUTOR: GRUPO 4
# FECHA: 18-12-2025
#===============================CARGA DE DATOS==================================
options(encoding = "UTF-8")
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(e1071)
setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")
datos <- read.csv("tablap.csv", header = TRUE, dec = ",", sep = ";", fileEncoding = "UTF-8")
# Funcion Moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

#===========================PROCESAMIENTO DE LA VARIABLE======================
a_perforacion <- datos$Spud.Initiation.year

min_year <- min(a_perforacion, na.rm = TRUE)
max_year <- max(a_perforacion, na.rm = TRUE)
start_decade <- floor(min_year / 10) * 10
end_decade <- ceiling(max_year / 10) * 10

breaks <- seq(start_decade, end_decade + 10, by = 10)
labels <- paste0(breaks[1:(length(breaks)-1)], "-", breaks[2:length(breaks)] - 1)

a_perforacion_decadas <- cut(a_perforacion, breaks = breaks, labels = labels, right = FALSE)
TDF_base <- as.data.frame(table(a_perforacion_decadas))
names(TDF_base) <- c("Decada", "ni")

hi <- (TDF_base$ni / sum(TDF_base$ni)) * 100
Niasc <- cumsum(TDF_base$ni)
Hiasc <- cumsum(hi)
Nidsc <- rev(cumsum(rev(TDF_base$ni)))
Hidsc <- rev(cumsum(rev(hi)))

Tabla_Final <- data.frame(
  N = 1:nrow(TDF_base),
  Decada = as.character(TDF_base$Decada),
  ni = TDF_base$ni,
  hi = round(hi, 2),
  Niasc = Niasc,
  Hiasc = round(Hiasc, 2),
  Nidsc = Nidsc,
  Hidsc = round(Hidsc, 2)
)

Fila_Total <- data.frame(
  N = NA, Decada = "TOTAL", ni = sum(Tabla_Final$ni), hi = 100,
  Niasc = NA, Hiasc = NA, Nidsc = NA, Hidsc = NA
)
TDF_Completa <- rbind(Tabla_Final, Fila_Total)

# =========================TABLA PROFESIONAL (GT)======================

tabla_gt_resultado <- TDF_Completa %>%
  gt() %>%
  tab_header(
    title = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION",
  ) %>%
  cols_label(
    N = "N", Decada = "RANGO", ni = "ni", hi = "hi",
    Niasc = "Ni_asc", Hiasc = "Hi_asc", 
    Nidsc = "Ni_des", Hidsc = "Hi_des"
  ) %>%
  sub_missing(columns = everything(), missing_text = "") %>%
  tab_style(
    style = list(cell_fill(color = "gray95"), cell_text(weight = "bold")),
    locations = cells_body(rows = Decada == "TOTAL")
  ) %>%
  cols_align(align = "center", columns = everything())

tabla_gt_resultado
DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION
N RANGO ni hi Ni_asc Hi_asc Ni_des Hi_des
1 1950-1959 1130 9.00 1130 9.00 12561 100.00
2 1960-1969 873 6.95 2003 15.95 11431 91.00
3 1970-1979 1416 11.27 3419 27.22 10558 84.05
4 1980-1989 1741 13.86 5160 41.08 9142 72.78
5 1990-1999 1687 13.43 6847 54.51 7401 58.92
6 2000-2009 5223 41.58 12070 96.09 5714 45.49
7 2010-2019 477 3.80 12547 99.89 491 3.91
8 2020-2029 14 0.11 12561 100.00 14 0.11

TOTAL 12561 100.00



#============================GRAFICOS=========================================
par(mar = c(8, 4, 4, 2) + 0.1)
#========================GRAFICO FRECUENCIA ABSOLUTA LOCAL====================
barplot(Tabla_Final$ni, names.arg = Tabla_Final$Decada, las = 2,
        main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION", col = "skyblue", ylab = "ni")

#=======================GRAFICO FRECUENCIA ABSOLUTA GLOBAL====================
barplot(Tabla_Final$ni, names.arg = Tabla_Final$Decada, las = 2,
        main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION", col = "darkgrey", 
        ylab = "ni", ylim = c(0, sum(Tabla_Final$ni)))

#=====================GRAFICO FRECUENCIA RELATIVA LOCAL=======================
barplot(Tabla_Final$hi, names.arg = Tabla_Final$Decada, las = 2,
        main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION", col = "lightgreen", ylab = "hi (%)")

#====================GRAFICO FRECUENCIA RELATIVA GLOBAL=======================
barplot(Tabla_Final$hi, names.arg = Tabla_Final$Decada, las = 2,
        main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION", col = "grey50", 
        ylab = "hi (%)", ylim = c(0, 100))

# OJIVAS 5-6 (REORDENADAS)
x_ojiva <- 1:nrow(Tabla_Final)

#=======================GRAFICO DE Ojiva Absoluta (Ni)======================

plot(x_ojiva, Tabla_Final$Niasc, type = "p", pch = 19, col = "blue",
     main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION",
     xlab = "Decada", ylab = "Cantidad Acumulada", xaxt = 'n')
axis(side = 1, at = x_ojiva, labels = Tabla_Final$Decada, las = 2, cex.axis = 0.7)
points(x_ojiva, Tabla_Final$Nidsc, col = "darkred", pch = 18)
legend("topright", legend = c("Ni Asc", "Ni Des"), col = c("blue", "darkred"), pch = c(19, 18), bty = "n")

#=====================GRAFICO DE Ojiva Relativa (Hi)========================

plot(x_ojiva, Tabla_Final$Hiasc, type = "p", pch = 19, col = "blue",
     main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION",
     xlab = "Decada", ylab = "Porcentaje Acumulado (%)", xaxt = 'n', ylim = c(0, 100))
axis(side = 1, at = x_ojiva, labels = Tabla_Final$Decada, las = 2, cex.axis = 0.7)
points(x_ojiva, Tabla_Final$Hidsc, col = "darkred", pch = 18)
legend("right", legend = c("Hi Asc", "Hi Des"), col = c("blue", "darkred"), pch = c(19, 18), bty = "n")

#================================BOXPLOT 7===================================

boxplot(a_perforacion, horizontal = TRUE, col = "lightblue",
        main = "DISTRIBUCION DE ANIO DE INICIO DE LA PERFORACION", xlab = "A??o")

#======================INDICADORES ESTADISTICOS=============================

mean_val <- mean(a_perforacion, na.rm = TRUE)
sd_val <- sd(a_perforacion, na.rm = TRUE)

indicadores_df <- data.frame(
  Indicador = c("Moda", "Mediana", "Media", "Desviacion Estandar", 
                "Varianza", "Coef. Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(a_perforacion), 2),
    round(median(a_perforacion, na.rm = TRUE), 2),
    round(mean_val, 2),
    round(sd_val, 2),
    round(var(a_perforacion, na.rm = TRUE), 2),
    round((sd_val / mean_val) * 100, 2),
    round(skewness(a_perforacion, na.rm = TRUE), 2),
    round(kurtosis(a_perforacion, na.rm = TRUE), 2)
  )
)

gt(indicadores_df) %>% 
  tab_header(title = "INDICADORES ESTADISTICOS GENERALES")
INDICADORES ESTADISTICOS GENERALES
Indicador Valor
Moda 2006.00
Mediana 1997.00
Media 1990.09
Desviacion Estandar 17.33
Varianza 300.28
Coef. Variacion (%) 0.87
Asimetria -0.69
Curtosis -0.71