# --- 1. IMPORTAR DATOS ---
setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header = TRUE, sep = ";", dec = ".", fill = TRUE)

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# --- 2. Filtrar el subconjunto ---
datos_log <- datos %>%
  filter(COUNTY == "Lincoln",
         WELL_TYPE == "Private Gas Well",
         !is.na(GROUND_ELEVATION), !is.na(TOTAL_DEPTH),
         GROUND_ELEVATION > 0, TOTAL_DEPTH > 0)

# Revisión de valores extremos
summary(datos_log$GROUND_ELEVATION)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   86.87  180.44  184.30  185.46  190.78  209.45
# --- 3. Modelo Logarítmico ---
modelo_log <- lm(TOTAL_DEPTH ~ log(GROUND_ELEVATION), data = datos_log)

a <- coef(modelo_log)[1]
b <- coef(modelo_log)[2]
r2 <- summary(modelo_log)$r.squared

cat("Ecuación: TOTAL_DEPTH =", round(a, 2), "+", round(b, 2), "* ln(ELEVATION)\n")
## Ecuación: TOTAL_DEPTH = 3480.76 + -637.79 * ln(ELEVATION)
cat("R² =", round(r2, 4), "\n")
## R² = 0.8842
# --- 4. Datos para la curva ---
puntos_prediccion <- data.frame(
  GROUND_ELEVATION = seq(
    min(datos_log$GROUND_ELEVATION),
    max(datos_log$GROUND_ELEVATION),
    length.out = 300
  )
)

puntos_prediccion$TOTAL_DEPTH <- predict(modelo_log, newdata = puntos_prediccion)

# --- 5. Graficar ---
ggplot(datos_log, aes(x = GROUND_ELEVATION, y = TOTAL_DEPTH)) +
  geom_point(color = "darkgreen", alpha = 0.6, size = 3) +
  geom_line(data = puntos_prediccion, aes(x = GROUND_ELEVATION, y = TOTAL_DEPTH),
            color = "blue", size = 1.5) +
  labs(
    title = "Regresión Logarítmica (Lincoln - Gas Privado)",
    subtitle = paste("R² =", round(r2, 3)),
    x = "Elevación del Terreno (m)",
    y = "Profundidad del Pozo (m)"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

r2_log <- summary(modelo_log)$r.squared
cat("R² Logarítmico =", round(r2_log, 4), "\n")
## R² Logarítmico = 0.8842