setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")
library(e1071)
library(PASWR)
## Loading required package: lattice
library(SmartEDA)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
str(datos)
## 'data.frame': 38113 obs. of 81 variables:
## $ Vehicle.ID : int 26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
## $ Year : int 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
## $ Make : chr "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
## $ Model : chr "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
## $ Class : chr "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
## $ Drive : chr "" "" "" "" ...
## $ Transmission : chr "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
## $ Transmission.Descriptor : chr "" "" "" "" ...
## $ Engine.Index : int 9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
## $ Engine.Descriptor : chr "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
## $ Engine.Cylinders : int 6 6 4 4 4 4 6 6 6 6 ...
## $ Engine.Displacement : num 2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
## $ Turbocharger : logi NA NA NA NA NA NA ...
## $ Supercharger : chr "" "" "" "" ...
## $ Fuel.Type : chr "Regular" "Regular" "Regular" "Regular" ...
## $ Fuel.Type.1 : chr "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
## $ Fuel.Type.2 : chr "" "" "" "" ...
## $ City.MPG..FT1. : int 17 17 18 18 18 18 13 13 15 15 ...
## $ Unrounded.City.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT1. : int 24 24 25 25 17 17 13 13 20 19 ...
## $ Unrounded.Highway.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.City.MPG..FT1. : num 21 21 23 23 22 22 16 16 19 19 ...
## $ Unadjusted.Highway.MPG..FT1. : num 34 34 35 35 24 24 18 18 27 26 ...
## $ Unadjusted.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT1. : int 20 20 21 21 17 17 13 13 17 17 ...
## $ Unrounded.Combined.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Combined.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Annual.Fuel.Cost..FT1. : int 1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
## $ Annual.Fuel.Cost..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Gas.Guzzler.Tax : chr "" "" "" "" ...
## $ Save.or.Spend..5.Year. : int -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
## $ Annual.Consumption.in.Barrels..FT1.: num 16.5 16.5 15.7 15.7 19.4 ...
## $ Annual.Consumption.in.Barrels..FT2.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Tailpipe.CO2..FT1. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT1. : num 444 444 423 423 523 ...
## $ Tailpipe.CO2..FT2. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fuel.Economy.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score..Alt.Fuel. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ My.MPG.Data : chr "N" "N" "N" "N" ...
## $ X2D.Passenger.Volume : int 74 74 0 0 0 0 0 0 0 0 ...
## $ X2D.Luggage.Volume : int 7 7 0 0 0 0 0 0 0 0 ...
## $ X4D.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X4D.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Start.Stop.Technology : chr "" "" "" "" ...
## $ Alternative.Fuel.Technology : chr "" "" "" "" ...
## $ Electric.Motor : chr "" "" "" "" ...
## $ Manufacturer.Code : chr "" "" "" "" ...
## $ Gasoline.Electricity.Blended..CD. : chr "False" "False" "False" "False" ...
## $ Vehicle.Charger : chr "" "" "" "" ...
## $ Alternate.Charger : chr "" "" "" "" ...
## $ Hours.to.Charge..120V. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..AC.240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.City.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Highway.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Combined.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT1. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT2. : chr "" "" "" "" ...
## $ City.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
which(names(datos)== "Annual.Fuel.Cost..FT1.")
## [1] 43
which(names(datos)== "Engine.Displacement")
## [1] 12
which(names(datos)== "City.MPG..FT1.")
## [1] 18
which(names(datos)== "Highway.MPG..FT1.")
## [1] 25
which(names(datos)== "Combined.MPG..FT1.")
## [1] 36
which(names(datos)== "Unadjusted.City.MPG..FT1.")
## [1] 32
which(names(datos)== "Unadjusted.Highway.MPG..FT1.")
## [1] 33
DatCostAnuCmbFT1<-datos[ ,43]
DatDesplazamientoMotor<-datos[ ,12]
DatCiudadmpgFT1<-datos[ ,18]
DatCarreterampgFT1<-datos[ ,25]
DatCombustibleCombmpgFT1<-datos[ ,36]
DatCorrCiudadmpgFT1<-datos[ ,32]
DatCorrCarreterampgFT1<-datos[ ,33]
summary(DatCostAnuCmbFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 1600 1950 1971 2350 6050
summary(DatDesplazamientoMotor)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 2.200 3.000 3.318 4.300 8.400 134
summary(DatCiudadmpgFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 15.00 17.00 17.98 20.00 150.00
summary(DatCarreterampgFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 20.00 24.00 24.08 27.00 122.00
summary(DatCombustibleCombmpgFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 17.00 19.00 20.22 23.00 136.00
summary(DatCorrCiudadmpgFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 18.00 21.05 22.65 25.20 224.80
summary(DatCorrCarreterampgFT1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.12 33.00 33.68 38.20 182.70
Engine Displacement y Combined MPG (FT1)
cor(datos[ , c(43,36)])
## Annual.Fuel.Cost..FT1. Combined.MPG..FT1.
## Annual.Fuel.Cost..FT1. 1.0000000 -0.7448504
## Combined.MPG..FT1. -0.7448504 1.0000000
Annual Fuel Cost (FT1) y City MPG (FT1)
cor(datos[ , c(43,18)])
## Annual.Fuel.Cost..FT1. City.MPG..FT1.
## Annual.Fuel.Cost..FT1. 1.0000000 -0.6920496
## City.MPG..FT1. -0.6920496 1.0000000
City.MPG..FT1. Y Highway.MPG..FT1.
cor(datos[ , c(18,25)])
## City.MPG..FT1. Highway.MPG..FT1.
## City.MPG..FT1. 1.0000000 0.9317386
## Highway.MPG..FT1. 0.9317386 1.0000000
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")
plot(datos$Annual.Fuel.Cost..FT1.,datos$Engine.Displacement)
x<-datos$Engine.Displacement
y<-datos$Annual.Fuel.Cost..FT1.
plot(x,y)
##circulo de la regresión
regresionLineal<- lm(y~x, na.action = na.omit)
regresionLineal
##
## Call:
## lm(formula = y ~ x, na.action = na.omit)
##
## Coefficients:
## (Intercept) x
## 977.5 300.6
summary(regresionLineal)
##
## Call:
## lm(formula = y ~ x, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1181.2 -209.7 -29.7 171.2 3509.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 977.530 4.515 216.5 <2e-16 ***
## x 300.617 1.259 238.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 334.2 on 37977 degrees of freedom
## (134 observations deleted due to missingness)
## Multiple R-squared: 0.6002, Adjusted R-squared: 0.6002
## F-statistic: 5.701e+04 on 1 and 37977 DF, p-value: < 2.2e-16
plot(x,y,col=4,pch=7,main="Regresión Lineal",
xlab="Volúmen del motor",ylab="Costo Anual del Combustible Primario")
abline(regresionLineal,col="green")
R <- cor(x, y, use = "complete.obs")
R2<- R^2*100
plot(datos$Annual.Fuel.Cost..FT1.,datos$Engine.Displacement)
x<-datos$Engine.Displacement
y<-datos$Annual.Fuel.Cost..FT1.
plot(x,y)
datos <- data.frame(x = x, y = y)
promedios_xy <- datos %>%
group_by(x) %>%
summarise(y_prom = mean(y, na.rm = TRUE))
modelo_prom <- lm(y_prom ~ x, data = promedios_xy)
summary(modelo_prom)
##
## Call:
## lm(formula = y_prom ~ x, data = promedios_xy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -831.32 -135.18 -7.16 162.08 708.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 957.73 69.25 13.83 <2e-16 ***
## x 310.71 15.31 20.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 257.1 on 64 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8655, Adjusted R-squared: 0.8634
## F-statistic: 412 on 1 and 64 DF, p-value: < 2.2e-16
plot(x, y, col = rgb(0, 0, 1, alpha = 0.1), pch = 16,
main = "Regresión lineal con promedios por X",
xlab = "Volúmen del motor", ylab = "Costo anual del combustible")
points(promedios_xy$x, promedios_xy$y_prom, col = "red", pch = 19)
abline(modelo_prom, col = "blue", lwd = 2)
Rm <- cor(promedios_xy$x, promedios_xy$y_prom, use = "complete.obs")
Rm
## [1] 0.9303469
Rm2<- Rm^2*100
Rm2
## [1] 86.55453
setwd("/cloud/project")
datos<-read.csv("database.csv", header =TRUE,sep=",",dec=".")
plot(datos$Annual.Fuel.Cost..FT1.,datos$Combined.MPG..FT1.)
x<-datos$Combined.MPG..FT1.
y<-datos$Annual.Fuel.Cost..FT1.
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 17.00 19.00 20.22 23.00 136.00
summary(y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 1600 1950 1971 2350 6050
plot(x,y)
x1<-log(x)
y1<-log(y)
regresionPotencial<- lm(y1~x1, na.action = na.omit)
regresionPotencial
##
## Call:
## lm(formula = y1 ~ x1, na.action = na.omit)
##
## Coefficients:
## (Intercept) x1
## 10.4313 -0.9706
summary(regresionPotencial)
##
## Call:
## lm(formula = y1 ~ x1, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19389 -0.05617 -0.05022 0.11857 0.57043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.431254 0.005239 1991.2 <2e-16 ***
## x1 -0.970617 0.001758 -552.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09039 on 38111 degrees of freedom
## Multiple R-squared: 0.8889, Adjusted R-squared: 0.8889
## F-statistic: 3.049e+05 on 1 and 38111 DF, p-value: < 2.2e-16
beta0<-regresionPotencial$coefficients[1]
beta1<-regresionPotencial$coefficients[2]
b<-beta1
a<-exp(beta0)
plot(x,y,col=5,pch=7,main = "Regresión Potencial",
xlab="Combustible Primario Combinado en MPG",ylab="Costo Anual del Combustible Primario")
curve(a*x^b,from=0, to=100,add=TRUE,col="yellow")
R <- cor(x1, y1, use = "complete.obs")
R
## [1] -0.9428029
R2<- R^2*100
R2
## [1] 88.88773