# CARGA DE DATOS
library(dplyr)
library(knitr)
library(gt)
library(scatterplot3d)
library(plotly)
setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")
datos <- read.csv("tablap.csv", header = TRUE, sep = ";", dec = ",")
Definición de Variables y asignacion de nombres y, x1, x2
options(scipen = 999)
volumen <- as.numeric(datos$Volume.excavated.soil..slope)
acero <- as.numeric(datos$Mass.of.steel..pad.)
concreto <- as.numeric(datos$Mass.of.concrete..pad.)
TPV <- data.frame(y = volumen, x1 = acero, x2 = concreto)
TPV <- na.omit(TPV)
TPV <- TPV[TPV$y > 0 & TPV$x1 > 0 & TPV$x2 > 0, ]
# Filtrar outliers
filtro_iqr <- function(v){
Q1 <- quantile(v, 0.25)
Q3 <- quantile(v, 0.75)
IQRv <- Q3 - Q1
li <- Q1 - 1 * IQRv
ls <- Q3 + 1 * IQRv
return(v >= li & v <= ls)
}
filas_buenas <- filtro_iqr(TPV$y) & filtro_iqr(TPV$x1) & filtro_iqr(TPV$x2)
TPV_filtrado <- TPV[filas_buenas, ]
outliers_detectados <- total_antes_outliers - nrow(TPV_filtrado)
outliers_detectados
## [1] 93
TPV <- TPV_filtrado
| Tabla Nº1. Distribución de Frecuencias | |||
| N° | Volumen (y) | Masa Acero (x1) | Masa Concreto (x2) |
|---|---|---|---|
| 1 | 17,503.00 | 45,145.00 | 97,716.00 |
| 2 | 15,377.00 | 57,067.00 | 24,230.00 |
| 3 | 18,795.00 | 97,792.00 | 95,205.00 |
| 4 | 12,289.00 | 11,952.00 | 41,747.00 |
| 5 | 14,959.00 | 45,026.00 | 71,104.00 |
| 6 | 18,252.00 | 65,894.00 | 94,074.00 |
| 7 | 16,002.00 | 67,621.00 | 53,348.00 |
| 8 | 11,634.00 | 10,242.00 | 23,864.00 |
| 9 | 13,639.00 | 21,904.00 | 76,071.00 |
| 10 | 15,618.00 | 61,255.00 | 36,911.00 |
| 11 | 15,090.00 | 74,581.00 | 23,561.00 |
| 12 | 16,822.00 | 84,608.00 | 38,160.00 |
| 13 | 15,577.00 | 61,943.00 | 20,645.00 |
| 14 | 15,564.00 | 79,323.00 | 20,660.00 |
| 15 | 14,457.00 | 53,579.00 | 24,869.00 |
| 16 | 14,764.00 | 72,566.00 | 12,436.00 |
| 17 | 15,566.00 | 92,049.00 | 19,066.00 |
| 18 | 18,479.00 | 69,135.00 | 78,769.00 |
| 19 | 13,102.00 | 49,074.00 | 13,945.00 |
| 20 | 13,155.00 | 39,901.00 | 30,322.00 |
| Tabla 1 de 2 | |||
par(mar = c(5, 6, 4, 2))
Cobrereg <- scatterplot3d(x1, x2, y, angle = 225, pch = 16, color = "blue",
cex.symbols = 0.6,
main = "Gráfica N°1: Diagrama de dispersión de Volumen, Acero
y Concreto",
xlab = "Masa de Acero (Kg)",
ylab = "Masa de Concreto (Kg)",
zlab = "Volumen (m^3)",
y.margin.add = 0.9,
las = 1,
xlim = c(0, max_x1),
ylim = c(0, max_x2),
zlim = c(0, max_y + 5))
Calculo de Parámetros
#CALCULO DE PARAMETROS
regresion_multiple <- lm(y ~ x1 + x2)
a_val <- round(coef(regresion_multiple)[1], 3)
a_val
## (Intercept)
## 10056.76
b_val <- round(coef(regresion_multiple)[2], 5)
b_val
## x1
## 0.05941
c_val <- round(coef(regresion_multiple)[3], 5)
c_val
## x2
## 0.0395
Diagrama de Dispersión
par(mar = c(5, 6, 4, 2))
Cobrereg <- scatterplot3d(x1, x2, y, angle = 225, pch = 16, color = "blue",
cex.symbols = 0.6,
main = "Gráfica N°2: Comparacion de la realidad
con el modelo multivariable lineal de la Masa de acero,
Masa de concreto y Volumen",
xlab = "Masa de Acero (Kg)",
ylab = "Masa de Concreto (Kg)",
zlab = "Volumen (m^3)",
y.margin.add = 0.5,
las = 1,
xlim = c(0, max_x1),
ylim = c(0, max_x2),
zlim = c(0, max_y + 5))
Cobrereg$plane3d(regresion_multiple, col = "red", lwd = 1.5)
Visualización Interactiva del Modelo
# 1. Ajuste del modelo con la muestra limpia
regresion_multiple <- lm(y ~ x1 + x2, data = TPV)
# 2. Secuencias basadas estrictamente en la mitad filtrada
x1_seq <- seq(min(TPV$x1, na.rm = TRUE), max(TPV$x1, na.rm = TRUE), length.out = 20)
x2_seq <- seq(min(TPV$x2, na.rm = TRUE), max(TPV$x2, na.rm = TRUE), length.out = 20)
# 3. Malla de predicción
grid <- expand.grid(x1 = x1_seq, x2 = x2_seq)
grid$y_pred <- predict(regresion_multiple, newdata = grid)
df_puntos <- data.frame(x1 = TPV$x1, x2 = TPV$x2, y = TPV$y)
z_matrix <- matrix(grid$y_pred, nrow = 20, ncol = 20, byrow = TRUE)
# 4. Renderizado con ejes fuertemente remarcados
plot_ly() %>%
add_markers(
data = df_puntos,
x = ~x1,
y = ~x2,
z = ~y,
marker = list(color = "skyblue", size = 3),
name = "Datos reales (50%)"
) %>%
add_surface(
x = ~x1_seq,
y = ~x2_seq,
z = ~z_matrix,
opacity = 0.6,
colorscale = "Reds",
name = "Plano de regresión"
) %>%
layout(
title = "Modelo de Regresión Múltiple 3D - Interactiva",
scene = list(
xaxis = list(
title = "Masa de Acero (Kg)",
showline = TRUE,
linecolor = "black",
linewidth = 3,
mirror = TRUE,
gridcolor = "lightgray"
),
yaxis = list(
title = "Masa de Concreto (Kg)",
showline = TRUE,
linecolor = "black",
linewidth = 3,
mirror = TRUE,
gridcolor = "lightgray"
),
zaxis = list(
title = "Volumen (m^3)",
showline = TRUE,
linecolor = "black",
linewidth = 3,
mirror = TRUE,
gridcolor = "lightgray"
)
)
)
plot.new()
par(mar = c(1, 1, 1, 1))
plot.window(xlim = c(0, 100), ylim = c(0, 100))
rect(5, 15, 95, 85, border = "blue", lwd = 3)
text(50, 70, "Ecuación Múltiple Lineal", cex = 1.4, font = 2, col = "blue")
text(50, 50, "Y = a + bx1 + cx2", cex = 1.2, font = 1, col = "black")
text(50, 30, paste0("Y = ", a_val, " + (", b_val, ")x1 + (", c_val, ")x2"), cex = 1.1, font = 2, col = "darkgreen")
Coeficiente de Determinacion
r2 <- summary(regresion_multiple)$r.squared
r2
## [1] 0.9295343
Test de Pearson
r_multiple <- sqrt(r2)
r_multiple
## [1] 0.9641236
tabla_tests <- data.frame(
Indicador = c("Test de Pearson (r)", "coeficiente de Determinación (r^2)"),
Valor = c(paste0(round(r_multiple * 100, 2), " %"), paste0(round(r2 * 100, 2), " %"))
)
# Imprimir la tabla de indicadores
tabla_tests %>%
gt() %>%
tab_header(title = md("**Tests de Aprobación del Modelo Multivariable**")) %>%
tab_style(
style = list(cell_fill(color = "lightgray"), cell_text(weight = "bold")),
locations = cells_title(groups = "title")
) %>%
cols_align(align = "center") %>%
tab_options(table.width = pct(65), column_labels.font.weight = "bold") %>%
tab_source_note(
source_note = md("**Tabla 2 de 2**")
)
| Tests de Aprobación del Modelo Multivariable | |
| Indicador | Valor |
|---|---|
| Test de Pearson (r) | 96.41 % |
| coeficiente de Determinación (r^2) | 92.95 % |
| Tabla 2 de 2 | |
plot.new()
plot.window(xlim = c(0, 100), ylim = c(0, 100))
text(50, 85, "RESTRICCIONES DEL MODELO", cex = 1.4, font = 2, col = "#D9534F")
parrafo_1 <- "La interpretación del modelo se limita estrictamente
al rango observado de variables.Se deben evitar
extrapolaciones fuera del Rango analizado de
Masa de Acero y Concreto."
text(50, 55, parrafo_1, cex = 1.1, font = 3, col = "black")
rect(2, 5, 98, 95, border = "#D9534F", lwd = 3)
x1_test <- 60000 # Valor de Acero
x2_test <- 75000 # Valor de Concreto
C_Est <- a_val + (b_val * x1_test) + (c_val * x2_test)
plot.new()
plot.window(xlim = c(0, 100), ylim = c(0, 1))
rect(10, 0.4, 90, 0.6, col = "#E5E7E9", border = NA)
text(50, 0.85, "PRONÓSTICO DEL MODELO MULTIVARIABLE", cex = 1.5, font = 2, col = "#2A9D8F")
texto_pregunta <- paste0("
¿Qué volumen se espera con una masa de acero
de ", x1_test, " y una masa de concreto de ", x2_test, "?")
text(35, 0.75, texto_pregunta, cex = 1.3, font = 3)
text(50, 0.5, paste0("R = ", round(C_Est, 3), " m3"), cex = 1.6, font = 2, col = "#1F618D")
rect(10, 0.4, 90, 0.6, border = "#2A9D8F", lwd = 2)
Entre el volumen y la combinación de Acero y Concreto existe una regresión múltiple lineal. El volumen está influenciado en un 92.95% por la combinación de estas variables.
Por ejemplo: Para una masa de acero de 60000 y una masa de concreto de 75000, se espera un volumen de 16583.863 m3.