# --- 1. IMPORTAR DATOS ---
setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header = TRUE, sep = ";", dec = ".", fill = TRUE)


library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# 1. Selección Base
datos_base <- datos %>%
  filter(WELL_TYPE == "Natural Gas Well",
         !is.na(GROUND_ELEVATION), !is.na(TOTAL_DEPTH),
         GROUND_ELEVATION > 0, TOTAL_DEPTH > 0)

# 2. FILTRADO AGRESIVO (Nos quedamos solo con la acumulación principal)


modelo_temp <- lm(TOTAL_DEPTH ~ poly(GROUND_ELEVATION, 2, raw = TRUE), data = datos_base)
datos_base$distancia <- abs(residuals(modelo_temp))


umbral_estricto <- quantile(datos_base$distancia, 0.10)
datos_ultra_limpios <- datos_base %>% filter(distancia < umbral_estricto)

# 3. Modelo Final sobre los datos filtrados
modelo_poly <- lm(TOTAL_DEPTH ~ poly(GROUND_ELEVATION, 2, raw = TRUE), data = datos_ultra_limpios)
r2_poly <- summary(modelo_poly)$r.squared

# 4. Gráfico
datos_ultra_limpios$prediccion <- predict(modelo_poly, newdata = datos_ultra_limpios)

ggplot(datos_ultra_limpios, aes(x = GROUND_ELEVATION, y = TOTAL_DEPTH)) +
  # Puntos
  geom_point(color = "black", alpha = 0.6, size = 2) +
  # Curva Polinómica
  geom_line(aes(y = prediccion), color = "purple", size = 2) +
  
  labs(title = "Regresión Polinómica (Filtro de Acumulación)",
       subtitle = paste(" R² =", round(r2_poly, 3)),
       x = "Elevación (m)", y = "Profundidad (m)") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.