# =========================================================
# 🔹 MODELO DE REGRESIÓN MÚLTIPLE 3D - PETRÓLEO ONTARIO 🔹
# =========================================================

# --- 1. LIBRERÍAS ---
library(scatterplot3d)

# --- 2. CARGA DE DATOS ---
setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header = TRUE, sep = ";", dec = ".", fill = TRUE)

# --- 3. CONVERSIÓN A NUMÉRICAS ---
datos$TOTAL_DEPTH <- as.numeric(gsub(",", ".", as.character(datos$TOTAL_DEPTH)))
datos$GROUND_ELEVATION <- as.numeric(gsub(",", ".", as.character(datos$GROUND_ELEVATION)))
datos$TRUE_VERTICAL_DEPTH <- as.numeric(gsub(",", ".", as.character(datos$TRUE_VERTICAL_DEPTH)))

# --- 4. FILTRADO DE DATOS VÁLIDOS ---
datos_filtrados <- subset(datos,
                          !is.na(TOTAL_DEPTH) &
                            !is.na(GROUND_ELEVATION) &
                            !is.na(TRUE_VERTICAL_DEPTH))

# --- 5. ELIMINAR OUTLIERS USANDO IQR ---
eliminar_outliers <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  (x >= (Q1 - 1.5 * IQR)) & (x <= (Q3 + 1.5 * IQR))
}

sin_outliers <- eliminar_outliers(datos_filtrados$TOTAL_DEPTH) &
  eliminar_outliers(datos_filtrados$GROUND_ELEVATION) &
  eliminar_outliers(datos_filtrados$TRUE_VERTICAL_DEPTH)

datos_limpios <- datos_filtrados[sin_outliers, ]

# --- 6. REGRESIÓN MÚLTIPLE ---
df_reg <- data.frame(
  y = datos_limpios$TOTAL_DEPTH,
  x1 = datos_limpios$GROUND_ELEVATION,
  x2 = datos_limpios$TRUE_VERTICAL_DEPTH
)

modelo_multi <- lm(y ~ x1 + x2, data = df_reg)
summary(modelo_multi)
## 
## Call:
## lm(formula = y ~ x1 + x2, data = df_reg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64.42  -0.50  -0.30  -0.10 534.81 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  1.8841424  0.7580924    2.485  0.01295 *  
## x1          -0.0097176  0.0037178   -2.614  0.00896 ** 
## x2           1.0010477  0.0003471 2883.831  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.636 on 21225 degrees of freedom
## Multiple R-squared:  0.9975, Adjusted R-squared:  0.9975 
## F-statistic: 4.228e+06 on 2 and 21225 DF,  p-value: < 2.2e-16
# --- 7. GRÁFICO 3D CON PLANO DE REGRESIÓN ---
scatter3d <- scatterplot3d(df_reg$x1, df_reg$x2, df_reg$y,
                           pch = 16, color = "steelblue",
                           xlab = "Elevación del terreno (m)",
                           ylab = "Profundidad Vertical Verdadera (m)",
                           zlab = "Profundidad Total (m)",
                           main = "Modelo de Regresión Múltiple 3D",
                           angle = 135)

# --- 8. PLANO DE REGRESIÓN ---
x1_seq <- seq(min(df_reg$x1), max(df_reg$x1), length.out = 20)
x2_seq <- seq(min(df_reg$x2), max(df_reg$x2), length.out = 20)
grid <- expand.grid(x1_seq, x2_seq)
colnames(grid) <- c("x1", "x2")

grid$y_pred <- predict(modelo_multi, newdata = grid)
coords <- scatter3d$xyz.convert(grid$x1, grid$x2, grid$y_pred)

# Dibujar líneas del plano
for (i in seq(1, nrow(grid), by = 20)) {
  lines(coords$x[i:(i+19)], coords$y[i:(i+19)], col = "red", lwd = 2)
}
for (j in 1:20) {
  lines(coords$x[seq(j, nrow(grid), by = 20)], coords$y[seq(j, nrow(grid), by = 20)], col = "red", lwd = 2)
}



# --- 9. R²  ---
r2 <- summary(modelo_multi)$r.squared
r2_ajustado <- summary(modelo_multi)$adj.r.squared

cat("🔹 R²:", round(r2, 4), "\n")
## 🔹 R²: 0.9975
cat("🔹 R² ajustado:", round(r2_ajustado, 4), "\n")
## 🔹 R² ajustado: 0.9975
# --- 10. PREDICCIÓN DE EJEMPLO ---
nueva_obs <- data.frame(x1 = 200, x2 = 250)
y_predicha <- predict(modelo_multi, newdata = nueva_obs)
cat("\n🔮 Profundidad estimada (TOTAL_DEPTH) para Elevación=200 y Vertical=250:", round(y_predicha, 2), "m\n")
## 
## 🔮 Profundidad estimada (TOTAL_DEPTH) para Elevación=200 y Vertical=250: 250.2 m
install.packages("plotly")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

plot_ly() %>%
  # 🔵 Puntos reales
  add_markers(
    data = df_reg,
    x = ~x1,
    y = ~x2,
    z = ~y,
    marker = list(color = "steelblue", size = 3),
    name = "Datos reales"
  ) %>%
  
  # 🔴 Plano de regresión
  add_surface(
    x = matrix(grid$x1, nrow = 20, ncol = 20),
    y = matrix(grid$x2, nrow = 20, ncol = 20),
    z = matrix(grid$y_pred, nrow = 20, ncol = 20),
    opacity = 0.6,
    colorscale = "Reds",
    name = "Plano de regresión"
  ) %>%
  
  layout(
    title = "Modelo de Regresión Múltiple 3D - Plotly",
    scene = list(
      xaxis = list(title = "Elevación del terreno (m)"),
      yaxis = list(title = "Profundidad Vertical Verdadera (m)"),
      zaxis = list(title = "Profundidad Total (m)")
    )
  )
# Conclusion

conclusion <-"La regresión múltiple muestra que tanto la elevación del terreno como la profundidad vertical influyen significativamente en la profundidad total. El modelo presenta un excelente ajuste (R² = 0.9975) y permite estimar con alta precisión la profundidad total a partir de estas variables."