# --- 1. IMPORTAR DATOS ---
setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header = TRUE, sep = ";", dec = ".", fill = TRUE)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 1. Selección Base
datos_base <- datos %>%
filter(WELL_TYPE == "Natural Gas Well",
!is.na(GROUND_ELEVATION), !is.na(TOTAL_DEPTH),
GROUND_ELEVATION > 0, TOTAL_DEPTH > 0)
# 2. FILTRADO AGRESIVO (Nos quedamos solo con la acumulación principal)
modelo_temp <- lm(TOTAL_DEPTH ~ poly(GROUND_ELEVATION, 2, raw = TRUE), data = datos_base)
datos_base$distancia <- abs(residuals(modelo_temp))
umbral_estricto <- quantile(datos_base$distancia, 0.10)
datos_ultra_limpios <- datos_base %>% filter(distancia < umbral_estricto)
# 3. Modelo Final sobre los datos filtrados
modelo_poly <- lm(TOTAL_DEPTH ~ poly(GROUND_ELEVATION, 2, raw = TRUE), data = datos_ultra_limpios)
r2_poly <- summary(modelo_poly)$r.squared
# 4. Gráfico
datos_ultra_limpios$prediccion <- predict(modelo_poly, newdata = datos_ultra_limpios)
ggplot(datos_ultra_limpios, aes(x = GROUND_ELEVATION, y = TOTAL_DEPTH)) +
# Puntos
geom_point(color = "black", alpha = 0.6, size = 2) +
# Curva Polinómica
geom_line(aes(y = prediccion), color = "purple", size = 2) +
labs(title = "Regresión Polinómica (Filtro de Acumulación)",
subtitle = paste(" R² =", round(r2_poly, 3)),
x = "Elevación (m)", y = "Profundidad (m)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
