CARGAR DATOS

setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")

CARGAR LIBRERIAS Y PACKETES

library(e1071)
library(PASWR)
## Loading required package: lattice
library(SmartEDA)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Dibujar curva ajustada

MODELO N5 (POLINÓMICA) SIN DEPURAR

Diagrama de dispersion

plot(datos$Unadjusted.City.MPG..FT1.,datos$Unadjusted.Highway.MPG..FT1.)

x<-datos$Unadjusted.City.MPG..FT1.
y<-datos$Unadjusted.Highway.MPG..FT1.
plot(x,y)

xcuad<-x^2
xcub<-x^3
xcta<-x^4

Circulo de la regresión

regresionPolinomica<-lm(y~x+xcuad+xcub+xcta,na.action = na.omit)
regresionPolinomica
## 
## Call:
## lm(formula = y ~ x + xcuad + xcub + xcta, na.action = na.omit)
## 
## Coefficients:
## (Intercept)            x        xcuad         xcub         xcta  
##  -5.590e+00    2.298e+00   -2.735e-02    1.903e-04   -4.562e-07
summary(regresionPolinomica)
## 
## Call:
## lm(formula = y ~ x + xcuad + xcub + xcta, na.action = na.omit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.866  -1.716   0.087   1.973  56.380 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.590e+00  1.602e-01  -34.90   <2e-16 ***
## x            2.298e+00  1.458e-02  157.59   <2e-16 ***
## xcuad       -2.735e-02  4.063e-04  -67.30   <2e-16 ***
## xcub         1.903e-04  3.683e-06   51.67   <2e-16 ***
## xcta        -4.562e-07  1.035e-08  -44.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.055 on 38108 degrees of freedom
## Multiple R-squared:  0.9109, Adjusted R-squared:  0.9109 
## F-statistic: 9.741e+04 on 4 and 38108 DF,  p-value: < 2.2e-16

Extraer coeficientes de la información contenida en summary

beta0<-regresionPolinomica$coefficients[1]
beta1<-regresionPolinomica$coefficients[2]
beta2<-regresionPolinomica$coefficients[3]
beta3<-regresionPolinomica$coefficients[4]
beta4<-regresionPolinomica$coefficients[5]
a<-beta0
b<-beta1
c<-beta2
d<-beta3
e<-beta4

GRÁFICA REGRESION POLINÓMICA

plot(x,y,col=5,pch=7,main = "Regresión Polinómica",
     xlab="Consumo del Combutible Primario en Ciudad (MPG) Sin ajustar",ylab="Consumo del Combutible Primario en Carretera (MPG) Sin ajustar",
     xlim =c(0,224.8),ylim =c(0,182.7))
curve(a+(b*x)+(c*x^2)+(d*x^3)+(e*x^4),add=T,col="blue")

Test de Bondad

R <- cor(x, y, use = "complete.obs")
R
## [1] 0.9214694

REGRESIÓN POLINÓMICA DE 5° GRADO - DEPURADO

Cargar y limpiar datos

x <- datos$Unadjusted.City.MPG..FT1.
y <- datos$Unadjusted.Highway.MPG..FT1.
datos <- na.omit(data.frame(x = x, y = y))

Agregar términos polinómicos hasta grado 5

datos$x2 <- datos$x^2
datos$x3 <- datos$x^3
datos$x4 <- datos$x^4
datos$x5 <- datos$x^5

Ajustar modelo polinómico de grado 5

modelo <- lm(y ~ x + x2 + x3 + x4 + x5, data = datos)

Coeficientes

coef <- coef(modelo)
a <- coef[1]; b <- coef[2]; c <- coef[3]; d <- coef[4]; e <- coef[5]; f <- coef[6]

Test de Bondad

R <- cor(datos$x, datos$y)

Secuencia para curva

x_seq <- seq(min(datos$x), max(datos$x), length.out = 400)
y_pred <- a + b*x_seq + c*x_seq^2 + d*x_seq^3 + e*x_seq^4 + f*x_seq^5

Graficar puntos

plot(datos$x, datos$y,
     col = rgb(0, 1, 1, 0.4), pch = 0, cex = 0.8,
     main = "Regresión Polinómica",
     xlab = "Consumo del Combustible Primario en Ciudad (MPG) SIN ajustar",
     ylab = "Consumo del Combustible Primario en Carretera (MPG) SIN ajustar",
     xlim = c(0, max(datos$x)*1.05),
     ylim = c(0, max(datos$y)*1.05))
lines(x_seq, y_pred, col = "blue", lwd = 2)
legend("topleft",
       legend = paste0("R = ", round(R, 4)),
       col = "blue", lwd = 2, bty = "n", cex = 0.9)

Dibujar curva ajustada

MODELO 3D

Cargar los datos

setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")

Suponiendo que datos ya existe y no tiene NAs, o los reemplazaste así:

datos$Annual.Fuel.Cost..FT1.[is.na(datos$Annual.Fuel.Cost..FT1.)] <- 1970.67
datos$Engine.Displacement[is.na(datos$Engine.Displacement)] <- 3.317
datos$Combined.MPG..FT1.[is.na(datos$Combined.MPG..FT1.)] <- 20.216

x <- datos$Annual.Fuel.Cost..FT1.
y <- datos$Engine.Displacement
z <- datos$Combined.MPG..FT1.

Ajustar modelo lineal múltiple

modelo <- lm(x ~ y + z)

Calcular R y R2 del modelo múltiple

R <- sqrt(summary(modelo)$r.squared)

Crear rejilla para plano de regresión

y_seq <- seq(min(y), max(y), length.out = 30)
z_seq <- seq(min(z), max(z), length.out = 30)
grid <- expand.grid(y = y_seq, z = z_seq)
grid$pred <- predict(modelo, newdata = grid)

Crear texto con R y R2 para la leyenda

texto_leyenda <- paste0("R = ", round(R, 4))

Graficar con plotly incluyendo la leyenda

plot_ly() %>%
  add_markers(x = y, y = z, z = x, marker = list(color = 'blue'), name = "Datos") %>%
  add_surface(x = y_seq, y = z_seq, z = matrix(grid$pred, nrow=30), opacity = 0.5, name = "Plano de Regresión") %>%
  layout(scene = list(
    xaxis = list(title = "Volúmen del motor (L)"),
    yaxis = list(title = "Consumo de combustible combinado(MPG)"),
    zaxis = list(title = "Costo Anual del combustible primario($)"),
    annotations = list(
      list(
        x = min(y_seq), y = max(z_seq), z = max(x),
        text = texto_leyenda,
        showarrow = FALSE,
        font = list(color = "black", size = 14),
        bgcolor = 'rgba(255, 255, 255, 0.7)',
        xanchor = "left",
        yanchor = "top"
      )
    )
  ))

MODELO DEPURADO 3D

Ajustar modelo polinomial cuadrático

modelo_poly <- lm(x ~ y + z + I(y^2) + I(z^2) + I(y*z))

Test de Bondad

R_poly <- sqrt(summary(modelo_poly)$r.squared)
R2_poly <- summary(modelo_poly)$r.squared * 100

Crear rejilla para plano de regresión

y_seq <- seq(min(y), max(y), length.out = 30)
z_seq <- seq(min(z), max(z), length.out = 30)
grid <- expand.grid(y = y_seq, z = z_seq)

Predecir

grid$pred <- predict(modelo_poly, newdata = grid)

Recortar predicciones para mantener rango [500, 6050]

grid$pred <- pmax(pmin(grid$pred, 6050), 500)

Crear texto con R y R2

texto_leyenda_poly <- paste0("R = ", round(R_poly, 4), "\nR² = ", round(R2_poly, 2), "%")

Graficar con plotly

library(plotly)
plot_ly() %>%
  add_markers(x = y, y = z, z = x, marker = list(color = 'blue'), name = "Datos") %>%
  add_surface(x = y_seq, y = z_seq, z = matrix(grid$pred, nrow=30), opacity = 0.5, name = "Plano Polinomial") %>%
  layout(scene = list(
    xaxis = list(title = "Volúmen del motor (L)"),
    yaxis = list(title = "Consumo de combustible combinado(MPG)"),
    zaxis = list(title = "Costo Anual del combustible primario($)"),
    annotations = list(
      list(
        x = min(y_seq), y = max(z_seq), z = max(x),
        text = texto_leyenda_poly,
        showarrow = FALSE,
        font = list(color = "black", size = 14),
        bgcolor = 'rgba(255, 255, 255, 0.7)',
        xanchor = "left",
        yanchor = "top"
      )
    )
  ))