ANÁLISIS ESTADÍSTICO

1. CARGA DE LIBRERÍAS Y DATOS

#=========================ENCABEZADO================================
# TEMA: REGRESION EXPONENCIAL
# AUTOR: GRUPO 3
# FECHA: 03-2026
#===================================================================
library(dplyr)
library(knitr)
library(gt)

# Configuración de directorio y carga de archivo
setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")
datos <- read.csv("tablap.csv", header = TRUE, sep = ";", dec = ",")

2. TABLA PARES DE VALORES

# Extraer variables
area_pozo  <- as.numeric(datos$Area.of.well.pads)
produccion <- as.numeric(datos$Total.gas.production.by.2023)

# Crear TPV inicial y limpiar (NA, 0 y negativos)
TPV <- data.frame(area_pozo = area_pozo, produccion = produccion)
TPV <- na.omit(TPV)
TPV <- TPV[TPV$area_pozo > 0 & TPV$produccion > 0, ]

# --- ELIMINACIÓN DE OUTLIERS (SEGÚN GUÍA) ---
Q1_x <- quantile(TPV$area_pozo, 0.25); Q3_x <- quantile(TPV$area_pozo, 0.75)
IQR_x <- Q3_x - Q1_x
lim_inf_x <- Q1_x - 1.5 * IQR_x; lim_sup_x <- Q3_x + 1.5 * IQR_x

Q1_y <- quantile(TPV$produccion, 0.25); Q3_y <- quantile(TPV$produccion, 0.75)
IQR_y <- Q3_y - Q1_y
lim_inf_y <- Q1_y - 1.5 * IQR_y; lim_sup_y <- Q3_y + 1.5 * IQR_y

TPV_limpio <- TPV[
  TPV$area_pozo >= lim_inf_x & TPV$area_pozo <= lim_sup_x &
    TPV$produccion >= lim_inf_y & TPV$produccion <= lim_sup_y, 
]

# Ordenar la tabla
TPV_limpio <- TPV_limpio[order(TPV_limpio$area_pozo), ]

# --- RESETEAR NUMERACIÓN DE FILAS ---
row.names(TPV_limpio) <- NULL

# --- MOSTRAR TABLA (Primeros 20 registros con gt) ---
tabla_tpv_previa <- head(TPV_limpio, 20)
tabla_tpv_previa <- cbind(Nro = 1:nrow(tabla_tpv_previa), tabla_tpv_previa)

tabla_tpv_previa %>% 
  gt() %>%
  cols_label(Nro = "N°", area_pozo = "Área Pozo (x)", produccion = "Producción Gas (y)") %>%
  tab_header(title = md("**Tabla N° 4. Pares de Valores: Área vs. Producción**")) %>%
  cols_align(align = "center") %>%
  tab_options(table.width = pct(80), column_labels.font.weight = "bold")

N°	Área Pozo (x)	Producción Gas (y)
Tabla N° 4. Pares de Valores: Área vs. Producción
1	2804	59263
2	2807	58319
3	2821	58334
4	2822	108411
5	2823	80571
6	2824	57084
7	2827	58521
8	2839	74661
9	2850	68191
10	2868	77172
11	2870	50308
12	2871	69337
13	2875	72314
14	2879	51300
15	2880	86043
16	2880	93692
17	2886	68459
18	2891	42390
19	2898	50339
20	2898	68475

# Definición de variables finales para el modelo (100% de datos limpios)
x <- TPV_limpio$area_pozo
y <- TPV_limpio$produccion

# Selección aleatoria del 5% solo para representación visual
set.seed(123)
indice_visual <- sample(1:nrow(TPV_limpio), nrow(TPV_limpio) / 20)

3. DIAGRAMA DE DISPERSION

plot(x[indice_visual], y[indice_visual], 
     pch = 16, col = "blue",
     main = "Gráfica N°1: Diagrama de dispersión \n Área de pozo vs Producción de Gas",
     xlab = "Área de plataformas de pozo", 
     ylab = "Producción total de gas (2023)")

4. CONJETURA DE MODELO

# Transformación logarítmica de la variable dependiente Y
y1 <- log(y)

# Cálculo de parámetros (Usa 100% de los datos limpios)
regresion_exponencial <- lm(y1 ~ x)
beta0 <- coef(regresion_exponencial)[1]
beta1 <- coef(regresion_exponencial)[2]

a <- exp(beta0) # Intercepto transformado
b <- beta1      # Tasa de crecimiento

# Graficar comparación realidad vs modelo (Puntos al 50%)
plot(x[indice_visual], y[indice_visual], 
     pch = 16, col = "blue",
     main = "Gráfica N°2: Comparación de la realidad con el modelo exponencial",
     xlab = "Área de plataformas de pozo", 
     ylab = "Producción de gas")

# Añadir curva exponencial
curve(a * exp(b * x), from = min(x), to = max(x), add = TRUE, col = "red", lwd = 2)

# Mostrar Ecuación de forma limpia en el gráfico
eq_text <- paste0("Ecuación exponencial: Y = ", round(a, 2), "e^(", round(b, 3), "x)")
mtext(eq_text, side = 3, line = -2, cex = 1.2, col = "red", font = 2)

5. TEST DE APROBACION Y RESTRICCIONES

r <- cor(x, y1)

tabla_tests <- data.frame(
  Indicador = c("Coeficiente de Pearson (r)"),
  Valor = c(paste0(round(r * 100, 2), " %"))
)

# Imprimir la tabla de indicadores
tabla_tests %>% 
  gt() %>%
  tab_header(title = md("**Test de Aprobación del Modelo Exponencial**")) %>%
  cols_align(align = "center") %>%
  tab_options(table.width = pct(60), column_labels.font.weight = "bold")

Indicador	Valor
Test de Aprobación del Modelo Exponencial
Coeficiente de Pearson (r)	96.25 %

# Generación de la tarjeta gráfica independiente de restricciones
plot.new()
plot.window(xlim = c(0, 100), ylim = c(0, 100))

text(50, 85, "RESTRICCIONES DEL MODELO", cex = 1.4, font = 2, col = "#D9534F")

parrafo_1 <- "El modelo exponencial no permite valores de Y iguales o 
menores a cero. La confiabilidad se limita estrictamente 
al rango de datos observados."

text(50, 55, parrafo_1, cex = 1.1, font = 3, col = "black")

rect(2, 5, 98, 95, border = "#D9534F", lwd = 3)

6. CALCULO DE PRONOSTICOS

area_test <- 10 
T_Esp <- a * exp(b * area_test)

plot.new()
plot.window(xlim = c(0, 100), ylim = c(0, 1))

rect(10, 0.4, 90, 0.6, col = "#E5E7E9", border = NA)
text(50, 0.85, "PRONÓSTICO DEL MODELO EXPONENCIAL", cex = 1.5, font = 2, col = "#2A9D8F")

texto_pregunta <- paste0("¿Cuál sería la producción si se tiene un área de pozo de ", area_test, "?")
text(50, 0.75, texto_pregunta, cex = 1.1, font = 3)

text(50, 0.5, paste0("R: ", round(T_Esp, 2)), cex = 1.6, font = 2, col = "#1F618D")
rect(10, 0.4, 90, 0.6, border = "#2A9D8F", lwd = 2)

7. CONCLUSION

## Entre el área de pozo y la producción de gas existe una relación exponencial representada por el modelo 
## f(x) = 13588.53e^(0.001x). Ejemplo: Con un área de 10, el modelo predice una producción de 13668.72.

ANÁLISIS DE POZOS DE GAS NATURAL EN NUEVO MÉXICO: UN ENFOQUE BASADO EN DATOS

Grupo Nro. 3

03-2026

ANÁLISIS ESTADÍSTICO

1. CARGA DE LIBRERÍAS Y DATOS

2. TABLA PARES DE VALORES

3. DIAGRAMA DE DISPERSION

4. CONJETURA DE MODELO

5. TEST DE APROBACION Y RESTRICCIONES

6. CALCULO DE PRONOSTICOS

7. CONCLUSION