CARGAR DATOS
setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")
CARGAR LIBRERIAS Y PACKETES
library(e1071)
library(PASWR)
## Loading required package: lattice
library(SmartEDA)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Dibujar curva ajustada
MODELO N5 (POLINÓMICA) SIN DEPURAR
Diagrama de dispersion
plot(datos$Unadjusted.City.MPG..FT1.,datos$Unadjusted.Highway.MPG..FT1.)

x<-datos$Unadjusted.City.MPG..FT1.
y<-datos$Unadjusted.Highway.MPG..FT1.
plot(x,y)

xcuad<-x^2
xcub<-x^3
xcta<-x^4
Circulo de la regresión
regresionPolinomica<-lm(y~x+xcuad+xcub+xcta,na.action = na.omit)
regresionPolinomica
##
## Call:
## lm(formula = y ~ x + xcuad + xcub + xcta, na.action = na.omit)
##
## Coefficients:
## (Intercept) x xcuad xcub xcta
## -5.590e+00 2.298e+00 -2.735e-02 1.903e-04 -4.562e-07
summary(regresionPolinomica)
##
## Call:
## lm(formula = y ~ x + xcuad + xcub + xcta, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.866 -1.716 0.087 1.973 56.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.590e+00 1.602e-01 -34.90 <2e-16 ***
## x 2.298e+00 1.458e-02 157.59 <2e-16 ***
## xcuad -2.735e-02 4.063e-04 -67.30 <2e-16 ***
## xcub 1.903e-04 3.683e-06 51.67 <2e-16 ***
## xcta -4.562e-07 1.035e-08 -44.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.055 on 38108 degrees of freedom
## Multiple R-squared: 0.9109, Adjusted R-squared: 0.9109
## F-statistic: 9.741e+04 on 4 and 38108 DF, p-value: < 2.2e-16
GRÁFICA REGRESION POLINÓMICA
plot(x,y,col=5,pch=7,main = "Regresión Polinómica",
xlab="Consumo del Combutible Primario en Ciudad (MPG) Sin ajustar",ylab="Consumo del Combutible Primario en Carretera (MPG) Sin ajustar",
xlim =c(0,224.8),ylim =c(0,182.7))
curve(a+(b*x)+(c*x^2)+(d*x^3)+(e*x^4),add=T,col="blue")

Test de Bondad
R <- cor(x, y, use = "complete.obs")
R
## [1] 0.9214694
REGRESIÓN POLINÓMICA DE 5° GRADO - DEPURADO
Cargar y limpiar datos
x <- datos$Unadjusted.City.MPG..FT1.
y <- datos$Unadjusted.Highway.MPG..FT1.
datos <- na.omit(data.frame(x = x, y = y))
Agregar términos polinómicos hasta grado 5
datos$x2 <- datos$x^2
datos$x3 <- datos$x^3
datos$x4 <- datos$x^4
datos$x5 <- datos$x^5
Ajustar modelo polinómico de grado 5
modelo <- lm(y ~ x + x2 + x3 + x4 + x5, data = datos)
Coeficientes
coef <- coef(modelo)
a <- coef[1]; b <- coef[2]; c <- coef[3]; d <- coef[4]; e <- coef[5]; f <- coef[6]
Test de Bondad
R <- cor(datos$x, datos$y)
Secuencia para curva
x_seq <- seq(min(datos$x), max(datos$x), length.out = 400)
y_pred <- a + b*x_seq + c*x_seq^2 + d*x_seq^3 + e*x_seq^4 + f*x_seq^5
Graficar puntos
plot(datos$x, datos$y,
col = rgb(0, 1, 1, 0.4), pch = 0, cex = 0.8,
main = "Regresión Polinómica",
xlab = "Consumo del Combustible Primario en Ciudad (MPG) SIN ajustar",
ylab = "Consumo del Combustible Primario en Carretera (MPG) SIN ajustar",
xlim = c(0, max(datos$x)*1.05),
ylim = c(0, max(datos$y)*1.05))
lines(x_seq, y_pred, col = "blue", lwd = 2)
legend("topleft",
legend = paste0("R = ", round(R, 4)),
col = "blue", lwd = 2, bty = "n", cex = 0.9)

Dibujar curva ajustada
MODELO 3D
Cargar los datos
setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")
Suponiendo que datos ya existe y no tiene NAs, o los reemplazaste
así:
datos$Annual.Fuel.Cost..FT1.[is.na(datos$Annual.Fuel.Cost..FT1.)] <- 1970.67
datos$Engine.Displacement[is.na(datos$Engine.Displacement)] <- 3.317
datos$Combined.MPG..FT1.[is.na(datos$Combined.MPG..FT1.)] <- 20.216
x <- datos$Annual.Fuel.Cost..FT1.
y <- datos$Engine.Displacement
z <- datos$Combined.MPG..FT1.
Ajustar modelo lineal múltiple
modelo <- lm(x ~ y + z)
Calcular R y R2 del modelo múltiple
R <- sqrt(summary(modelo)$r.squared)
Crear rejilla para plano de regresión
y_seq <- seq(min(y), max(y), length.out = 30)
z_seq <- seq(min(z), max(z), length.out = 30)
grid <- expand.grid(y = y_seq, z = z_seq)
grid$pred <- predict(modelo, newdata = grid)
Crear texto con R y R2 para la leyenda
texto_leyenda <- paste0("R = ", round(R, 4))
Graficar con plotly incluyendo la leyenda
plot_ly() %>%
add_markers(x = y, y = z, z = x, marker = list(color = 'blue'), name = "Datos") %>%
add_surface(x = y_seq, y = z_seq, z = matrix(grid$pred, nrow=30), opacity = 0.5, name = "Plano de Regresión") %>%
layout(scene = list(
xaxis = list(title = "Volúmen del motor (L)"),
yaxis = list(title = "Consumo de combustible combinado(MPG)"),
zaxis = list(title = "Costo Anual del combustible primario($)"),
annotations = list(
list(
x = min(y_seq), y = max(z_seq), z = max(x),
text = texto_leyenda,
showarrow = FALSE,
font = list(color = "black", size = 14),
bgcolor = 'rgba(255, 255, 255, 0.7)',
xanchor = "left",
yanchor = "top"
)
)
))
MODELO DEPURADO 3D
Ajustar modelo polinomial cuadrático
modelo_poly <- lm(x ~ y + z + I(y^2) + I(z^2) + I(y*z))
Test de Bondad
R_poly <- sqrt(summary(modelo_poly)$r.squared)
R2_poly <- summary(modelo_poly)$r.squared * 100
Crear rejilla para plano de regresión
y_seq <- seq(min(y), max(y), length.out = 30)
z_seq <- seq(min(z), max(z), length.out = 30)
grid <- expand.grid(y = y_seq, z = z_seq)
Predecir
grid$pred <- predict(modelo_poly, newdata = grid)
Recortar predicciones para mantener rango [500, 6050]
grid$pred <- pmax(pmin(grid$pred, 6050), 500)
Crear texto con R y R2
texto_leyenda_poly <- paste0("R = ", round(R_poly, 4), "\nR² = ", round(R2_poly, 2), "%")
Graficar con plotly
library(plotly)
plot_ly() %>%
add_markers(x = y, y = z, z = x, marker = list(color = 'blue'), name = "Datos") %>%
add_surface(x = y_seq, y = z_seq, z = matrix(grid$pred, nrow=30), opacity = 0.5, name = "Plano Polinomial") %>%
layout(scene = list(
xaxis = list(title = "Volúmen del motor (L)"),
yaxis = list(title = "Consumo de combustible combinado(MPG)"),
zaxis = list(title = "Costo Anual del combustible primario($)"),
annotations = list(
list(
x = min(y_seq), y = max(z_seq), z = max(x),
text = texto_leyenda_poly,
showarrow = FALSE,
font = list(color = "black", size = 14),
bgcolor = 'rgba(255, 255, 255, 0.7)',
xanchor = "left",
yanchor = "top"
)
)
))