MODELOS DE REGRESIÓN SIMPLE

CARGAR DATOS

setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")

CARGAR LIBRERIAS Y PACKETES

library(e1071)
library(PASWR)
## Loading required package: lattice
library(SmartEDA)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

VISUALIZAR MI TABLA

str(datos)
## 'data.frame':    38113 obs. of  81 variables:
##  $ Vehicle.ID                         : int  26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
##  $ Year                               : int  1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
##  $ Make                               : chr  "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
##  $ Model                              : chr  "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
##  $ Class                              : chr  "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
##  $ Drive                              : chr  "" "" "" "" ...
##  $ Transmission                       : chr  "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
##  $ Transmission.Descriptor            : chr  "" "" "" "" ...
##  $ Engine.Index                       : int  9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
##  $ Engine.Descriptor                  : chr  "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
##  $ Engine.Cylinders                   : int  6 6 4 4 4 4 6 6 6 6 ...
##  $ Engine.Displacement                : num  2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
##  $ Turbocharger                       : logi  NA NA NA NA NA NA ...
##  $ Supercharger                       : chr  "" "" "" "" ...
##  $ Fuel.Type                          : chr  "Regular" "Regular" "Regular" "Regular" ...
##  $ Fuel.Type.1                        : chr  "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
##  $ Fuel.Type.2                        : chr  "" "" "" "" ...
##  $ City.MPG..FT1.                     : int  17 17 18 18 18 18 13 13 15 15 ...
##  $ Unrounded.City.MPG..FT1.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.MPG..FT2.                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.City.MPG..FT2.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Gasoline.Consumption..CD.     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Electricity.Consumption       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Utility.Factor                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT1.                  : int  24 24 25 25 17 17 13 13 20 19 ...
##  $ Unrounded.Highway.MPG..FT1.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT2.                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Highway.MPG..FT2.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Gasoline.Consumption..CD.  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Electricity.Consumption    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Utility.Factor             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.City.MPG..FT1.          : num  21 21 23 23 22 22 16 16 19 19 ...
##  $ Unadjusted.Highway.MPG..FT1.       : num  34 34 35 35 24 24 18 18 27 26 ...
##  $ Unadjusted.City.MPG..FT2.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.Highway.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT1.                 : int  20 20 21 21 17 17 13 13 17 17 ...
##  $ Unrounded.Combined.MPG..FT1.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT2.                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Combined.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Electricity.Consumption   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Gasoline.Consumption..CD. : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Utility.Factor            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Annual.Fuel.Cost..FT1.             : int  1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
##  $ Annual.Fuel.Cost..FT2.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gas.Guzzler.Tax                    : chr  "" "" "" "" ...
##  $ Save.or.Spend..5.Year.             : int  -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
##  $ Annual.Consumption.in.Barrels..FT1.: num  16.5 16.5 15.7 15.7 19.4 ...
##  $ Annual.Consumption.in.Barrels..FT2.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Tailpipe.CO2..FT1.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT1.   : num  444 444 423 423 523 ...
##  $ Tailpipe.CO2..FT2.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT2.   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fuel.Economy.Score                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score                          : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score..Alt.Fuel.               : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ My.MPG.Data                        : chr  "N" "N" "N" "N" ...
##  $ X2D.Passenger.Volume               : int  74 74 0 0 0 0 0 0 0 0 ...
##  $ X2D.Luggage.Volume                 : int  7 7 0 0 0 0 0 0 0 0 ...
##  $ X4D.Passenger.Volume               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X4D.Luggage.Volume                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Passenger.Volume         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Luggage.Volume           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Start.Stop.Technology              : chr  "" "" "" "" ...
##  $ Alternative.Fuel.Technology        : chr  "" "" "" "" ...
##  $ Electric.Motor                     : chr  "" "" "" "" ...
##  $ Manufacturer.Code                  : chr  "" "" "" "" ...
##  $ Gasoline.Electricity.Blended..CD.  : chr  "False" "False" "False" "False" ...
##  $ Vehicle.Charger                    : chr  "" "" "" "" ...
##  $ Alternate.Charger                  : chr  "" "" "" "" ...
##  $ Hours.to.Charge..120V.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..240V.             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..AC.240V.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.City.MPG                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Highway.MPG              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Combined.MPG             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT1.                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Range..FT1.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT1.                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT2.                        : chr  "" "" "" "" ...
##  $ City.Range..FT2.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT2.                : num  0 0 0 0 0 0 0 0 0 0 ...

VERIFICAR LA POSICIÓN DE MI VARIABLE

which(names(datos)== "Annual.Fuel.Cost..FT1.")
## [1] 43
which(names(datos)== "Engine.Displacement")
## [1] 12
which(names(datos)== "City.MPG..FT1.")
## [1] 18
which(names(datos)== "Highway.MPG..FT1.")
## [1] 25
which(names(datos)== "Combined.MPG..FT1.")
## [1] 36
which(names(datos)== "Unadjusted.City.MPG..FT1.")
## [1] 32
which(names(datos)== "Unadjusted.Highway.MPG..FT1.")
## [1] 33

EXTRAER DATOS

DatCostAnuCmbFT1<-datos[ ,43]
DatDesplazamientoMotor<-datos[ ,12]
DatCiudadmpgFT1<-datos[ ,18]
DatCarreterampgFT1<-datos[ ,25]
DatCombustibleCombmpgFT1<-datos[ ,36]
DatCorrCiudadmpgFT1<-datos[ ,32]
DatCorrCarreterampgFT1<-datos[ ,33]

RESUMEN DE ESTADÍSTICA DESCRIPÍVA DE CADA VARIABLE

summary(DatCostAnuCmbFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     500    1600    1950    1971    2350    6050
summary(DatDesplazamientoMotor)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   2.200   3.000   3.318   4.300   8.400     134
summary(DatCiudadmpgFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   15.00   17.00   17.98   20.00  150.00
summary(DatCarreterampgFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   20.00   24.00   24.08   27.00  122.00
summary(DatCombustibleCombmpgFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00   17.00   19.00   20.22   23.00  136.00
summary(DatCorrCiudadmpgFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   18.00   21.05   22.65   25.20  224.80
summary(DatCorrCarreterampgFT1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   27.12   33.00   33.68   38.20  182.70

REGRESIÓN SIMPLE (BIVARIADA)

Engine Displacement y Combined MPG (FT1)

cor(datos[ , c(43,36)])
##                        Annual.Fuel.Cost..FT1. Combined.MPG..FT1.
## Annual.Fuel.Cost..FT1.              1.0000000         -0.7448504
## Combined.MPG..FT1.                 -0.7448504          1.0000000

Annual Fuel Cost (FT1) y City MPG (FT1)

cor(datos[ , c(43,18)])
##                        Annual.Fuel.Cost..FT1. City.MPG..FT1.
## Annual.Fuel.Cost..FT1.              1.0000000     -0.6920496
## City.MPG..FT1.                     -0.6920496      1.0000000

City.MPG..FT1. Y Highway.MPG..FT1.

cor(datos[ , c(18,25)])
##                   City.MPG..FT1. Highway.MPG..FT1.
## City.MPG..FT1.         1.0000000         0.9317386
## Highway.MPG..FT1.      0.9317386         1.0000000

MODELO N1 (Lineal) SIN DEPURAR

Cargar datos

datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")

Diagrama de dispersion

plot(datos$Annual.Fuel.Cost..FT1.,datos$Engine.Displacement)

x<-datos$Engine.Displacement
y<-datos$Annual.Fuel.Cost..FT1.
plot(x,y)

##circulo de la regresión

regresionLineal<- lm(y~x, na.action = na.omit)
regresionLineal
## 
## Call:
## lm(formula = y ~ x, na.action = na.omit)
## 
## Coefficients:
## (Intercept)            x  
##       977.5        300.6

Resumen del modelo de regresión

summary(regresionLineal)
## 
## Call:
## lm(formula = y ~ x, na.action = na.omit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1181.2  -209.7   -29.7   171.2  3509.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  977.530      4.515   216.5   <2e-16 ***
## x            300.617      1.259   238.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 334.2 on 37977 degrees of freedom
##   (134 observations deleted due to missingness)
## Multiple R-squared:  0.6002, Adjusted R-squared:  0.6002 
## F-statistic: 5.701e+04 on 1 and 37977 DF,  p-value: < 2.2e-16

Gráfica de regresión lineal

plot(x,y,col=4,pch=7,main="Regresión Lineal",
     xlab="Volúmen del motor",ylab="Costo Anual del Combustible Primario")
abline(regresionLineal,col="green")

Test de bondad

R <- cor(x, y, use = "complete.obs")
R2<- R^2*100

MODELO LINEAL DEPURADO

Generar y asiganr X , Y

plot(datos$Annual.Fuel.Cost..FT1.,datos$Engine.Displacement)

x<-datos$Engine.Displacement
y<-datos$Annual.Fuel.Cost..FT1.
plot(x,y)

datos <- data.frame(x = x, y = y)

Agrupar por x y obtener promedio de y

promedios_xy <- datos %>%
  group_by(x) %>%
  summarise(y_prom = mean(y, na.rm = TRUE))

Ajustar modelo con los puntos promediados

modelo_prom <- lm(y_prom ~ x, data = promedios_xy)
summary(modelo_prom)
## 
## Call:
## lm(formula = y_prom ~ x, data = promedios_xy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -831.32 -135.18   -7.16  162.08  708.73 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   957.73      69.25   13.83   <2e-16 ***
## x             310.71      15.31   20.30   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 257.1 on 64 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8655, Adjusted R-squared:  0.8634 
## F-statistic:   412 on 1 and 64 DF,  p-value: < 2.2e-16

Gráfica con puntos promediados y línea de regresión

plot(x, y, col = rgb(0, 0, 1, alpha = 0.1), pch = 16,
     main = "Regresión lineal con promedios por X",
     xlab = "Volúmen del motor", ylab = "Costo anual del combustible")

points(promedios_xy$x, promedios_xy$y_prom, col = "red", pch = 19)
abline(modelo_prom, col = "blue", lwd = 2)

Test de bondad

Rm <- cor(promedios_xy$x, promedios_xy$y_prom, use = "complete.obs")
Rm
## [1] 0.9303469
Rm2<- Rm^2*100
Rm2
## [1] 86.55453

Cargar los datos

setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")

MODELO N2 (POTENCIAL)

Diagrama de dispersion

plot(datos$Annual.Fuel.Cost..FT1.,datos$Combined.MPG..FT1.)

x<-datos$Combined.MPG..FT1.
y<-datos$Annual.Fuel.Cost..FT1.
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00   17.00   19.00   20.22   23.00  136.00
summary(y)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     500    1600    1950    1971    2350    6050
plot(x,y)

x1<-log(x)
y1<-log(y)

Circulo de la regresión

regresionPotencial<- lm(y1~x1, na.action = na.omit)
regresionPotencial
## 
## Call:
## lm(formula = y1 ~ x1, na.action = na.omit)
## 
## Coefficients:
## (Intercept)           x1  
##     10.4313      -0.9706
summary(regresionPotencial)
## 
## Call:
## lm(formula = y1 ~ x1, na.action = na.omit)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19389 -0.05617 -0.05022  0.11857  0.57043 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.431254   0.005239  1991.2   <2e-16 ***
## x1          -0.970617   0.001758  -552.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09039 on 38111 degrees of freedom
## Multiple R-squared:  0.8889, Adjusted R-squared:  0.8889 
## F-statistic: 3.049e+05 on 1 and 38111 DF,  p-value: < 2.2e-16

Extraer coeficientes de la información contenida en summary

beta0<-regresionPotencial$coefficients[1]
beta1<-regresionPotencial$coefficients[2]
b<-beta1
a<-exp(beta0)

Gráfica Modelo Potencial

plot(x,y,col=5,pch=7,main = "Regresión Potencial",
     xlab="Combustible Primario Combinado en MPG",ylab="Costo Anual del Combustible Primario")
curve(a*x^b,from=0, to=100,add=TRUE,col="yellow")

Test de bondad

R <- cor(x1, y1, use = "complete.obs")
R
## [1] -0.9428029
R2<- R^2*100
R2
## [1] 88.88773