# =========================================================
# 馃敼 MODELO DE REGRESI脫N M脷LTIPLE 3D - PETR脫LEO ONTARIO 馃敼
# =========================================================

# --- 1. LIBRER脥AS ---
library(scatterplot3d)

# --- 2. CARGA DE DATOS ---
setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header = TRUE, sep = ";", dec = ".", fill = TRUE)

# --- 3. CONVERSI脫N A NUM脡RICAS ---
datos$TOTAL_DEPTH <- as.numeric(gsub(",", ".", as.character(datos$TOTAL_DEPTH)))
datos$GROUND_ELEVATION <- as.numeric(gsub(",", ".", as.character(datos$GROUND_ELEVATION)))
datos$TRUE_VERTICAL_DEPTH <- as.numeric(gsub(",", ".", as.character(datos$TRUE_VERTICAL_DEPTH)))

# --- 4. FILTRADO DE DATOS V脕LIDOS ---
datos_filtrados <- subset(datos,
                          !is.na(TOTAL_DEPTH) &
                            !is.na(GROUND_ELEVATION) &
                            !is.na(TRUE_VERTICAL_DEPTH))

# --- 5. ELIMINAR OUTLIERS USANDO IQR ---
eliminar_outliers <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  (x >= (Q1 - 1.5 * IQR)) & (x <= (Q3 + 1.5 * IQR))
}

sin_outliers <- eliminar_outliers(datos_filtrados$TOTAL_DEPTH) &
  eliminar_outliers(datos_filtrados$GROUND_ELEVATION) &
  eliminar_outliers(datos_filtrados$TRUE_VERTICAL_DEPTH)

datos_limpios <- datos_filtrados[sin_outliers, ]

# --- 6. REGRESI脫N M脷LTIPLE ---
df_reg <- data.frame(
  y = datos_limpios$TOTAL_DEPTH,
  x1 = datos_limpios$GROUND_ELEVATION,
  x2 = datos_limpios$TRUE_VERTICAL_DEPTH
)

modelo_multi <- lm(y ~ x1 + x2, data = df_reg)
summary(modelo_multi)
## 
## Call:
## lm(formula = y ~ x1 + x2, data = df_reg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64.42  -0.50  -0.30  -0.10 534.81 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  1.8841424  0.7580924    2.485  0.01295 *  
## x1          -0.0097176  0.0037178   -2.614  0.00896 ** 
## x2           1.0010477  0.0003471 2883.831  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.636 on 21225 degrees of freedom
## Multiple R-squared:  0.9975, Adjusted R-squared:  0.9975 
## F-statistic: 4.228e+06 on 2 and 21225 DF,  p-value: < 2.2e-16
# --- 7. GR脕FICO 3D CON PLANO DE REGRESI脫N ---
scatter3d <- scatterplot3d(df_reg$x1, df_reg$x2, df_reg$y,
                           pch = 16, color = "steelblue",
                           xlab = "Elevaci贸n del terreno (m)",
                           ylab = "Profundidad Vertical Verdadera (m)",
                           zlab = "Profundidad Total (m)",
                           main = "Modelo de Regresi贸n M煤ltiple 3D",
                           angle = 135)

# --- 8. PLANO DE REGRESI脫N ---
x1_seq <- seq(min(df_reg$x1), max(df_reg$x1), length.out = 20)
x2_seq <- seq(min(df_reg$x2), max(df_reg$x2), length.out = 20)
grid <- expand.grid(x1_seq, x2_seq)
colnames(grid) <- c("x1", "x2")

grid$y_pred <- predict(modelo_multi, newdata = grid)
coords <- scatter3d$xyz.convert(grid$x1, grid$x2, grid$y_pred)

# Dibujar l铆neas del plano
for (i in seq(1, nrow(grid), by = 20)) {
  lines(coords$x[i:(i+19)], coords$y[i:(i+19)], col = "red", lwd = 2)
}
for (j in 1:20) {
  lines(coords$x[seq(j, nrow(grid), by = 20)], coords$y[seq(j, nrow(grid), by = 20)], col = "red", lwd = 2)
}

# --- 9. R虏  ---
r2 <- summary(modelo_multi)$r.squared
r2_ajustado <- summary(modelo_multi)$adj.r.squared

cat("馃敼 R虏:", round(r2, 4), "\n")
## 馃敼 R虏: 0.9975
cat("馃敼 R虏 ajustado:", round(r2_ajustado, 4), "\n")
## 馃敼 R虏 ajustado: 0.9975
# --- 10. PREDICCI脫N DE EJEMPLO ---
nueva_obs <- data.frame(x1 = 200, x2 = 250)
y_predicha <- predict(modelo_multi, newdata = nueva_obs)
cat("\n馃敭 Profundidad estimada (TOTAL_DEPTH) para Elevaci贸n=200 y Vertical=250:", round(y_predicha, 2), "m\n")
## 
## 馃敭 Profundidad estimada (TOTAL_DEPTH) para Elevaci贸n=200 y Vertical=250: 250.2 m
# Conclusion

conclusion <-"La regresi贸n m煤ltiple muestra que tanto la elevaci贸n del terreno como la profundidad vertical influyen significativamente en la profundidad total. El modelo presenta un excelente ajuste (R虏 = 0.9975) y permite estimar con alta precisi贸n la profundidad total a partir de estas variables."