title: “modelamiento predicivo” author: “DIlmer Sandoval” date: “2024-01-27” output: html_document
honda <- read.csv("honda_sell_data.csv",
sep=",")
head(honda)
## Year Make Model Condition Price Consumer_Rating
## 1 2023 Honda Ridgeline RTL New $46,370 4.8
## 2 2023 Honda CR-V Hybrid Sport New $34,150 1.7
## 3 2023 Honda CR-V Hybrid Sport New $34,245 4.7
## 4 2022 Honda Pilot TrailSport New $46,500 5.0
## 5 2023 Honda CR-V Hybrid Sport Touring New $40,395 4.4
## 6 2023 Honda Odyssey EX-L New $42,250 4.4
## Consumer_Review_. Exterior_Color Interior_Color Drivetrain
## 1 9 Platinum White Pearl Beige All-wheel Drive
## 2 24 Platinum White Pearl Black FWD
## 3 2869 Meteorite Gray Metallic Black Front-wheel Drive
## 4 12 Sonic Gray Pearl Black All-wheel Drive
## 5 12 Crystal Black Pearl Black All-wheel Drive
## 6 12 Pacific Pewter Metallic Mocha Front-wheel Drive
## MPG Fuel_Type Transmission Engine
## 1 Gasoline Automatic 3.5L V6 24V GDI SOHC
## 2 Hybrid Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 3 Hybrid Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 4 19–25 Gasoline 9-Speed Automatic 3.5L V6 24V GDI SOHC
## 5 Hybrid Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 6 Gasoline 10-Speed Automatic 3.5L V6 24V GDI SOHC
## VIN Stock_. Mileage Comfort_Rating Interior_Design_Rating
## 1 5FPYK3F58PB011817 00830164 10 5 4.8
## 2 2HKRS5H5XPH702953 6402953 – 5 3.0
## 3 2HKRS5H55PH704805 PH704805 1 5 3.0
## 4 5FNYF6H82NB100429 5700429B 5 5 5.0
## 5 2HKRS6H90PH805954 5705954 5 5 3.0
## 6 5FNRL6H67PB024270 5724270 5 5 5.0
## Performance_Rating Value_For_Money_Rating Exterior_Styling_Rating
## 1 4.8 4.2 5
## 2 4.0 4.0 5
## 3 4.0 4.0 5
## 4 5.0 5.0 5
## 5 4.0 4.0 5
## 6 4.0 4.0 4
## Reliability_Rating State Seller_Type
## 1 5 CA Dealer
## 2 5 CA Dealer
## 3 5 CA Dealer
## 4 5 CA Dealer
## 5 5 CA Dealer
## 6 5 CA Dealer
str(honda)
## 'data.frame': 4999 obs. of 25 variables:
## $ Year : int 2023 2023 2023 2022 2023 2023 2023 2023 2023 2023 ...
## $ Make : chr "Honda" "Honda" "Honda" "Honda" ...
## $ Model : chr "Ridgeline RTL" "CR-V Hybrid Sport" "CR-V Hybrid Sport" "Pilot TrailSport" ...
## $ Condition : chr "New" "New" "New" "New" ...
## $ Price : chr "$46,370" "$34,150" "$34,245" "$46,500" ...
## $ Consumer_Rating : num 4.8 1.7 4.7 5 4.4 4.4 4.4 1.2 1.2 1.2 ...
## $ Consumer_Review_. : int 9 24 2869 12 12 12 12 70 70 70 ...
## $ Exterior_Color : chr "Platinum White Pearl" "Platinum White Pearl" "Meteorite Gray Metallic" "Sonic Gray Pearl" ...
## $ Interior_Color : chr "Beige" "Black" "Black" "Black" ...
## $ Drivetrain : chr "All-wheel Drive" "FWD" "Front-wheel Drive" "All-wheel Drive" ...
## $ MPG : chr "" "" "" "19–25" ...
## $ Fuel_Type : chr "Gasoline" "Hybrid" "Hybrid" "Gasoline" ...
## $ Transmission : chr "Automatic" "Automatic CVT" "Automatic CVT" "9-Speed Automatic" ...
## $ Engine : chr "3.5L V6 24V GDI SOHC" "2.0L I4 16V GDI DOHC Hybrid" "2.0L I4 16V GDI DOHC Hybrid" "3.5L V6 24V GDI SOHC" ...
## $ VIN : chr "5FPYK3F58PB011817" "2HKRS5H5XPH702953" "2HKRS5H55PH704805" "5FNYF6H82NB100429" ...
## $ Stock_. : chr "00830164" "6402953" "PH704805" "5700429B" ...
## $ Mileage : chr "10" "–" "1" "5" ...
## $ Comfort_Rating : num 5 5 5 5 5 5 5 5 5 5 ...
## $ Interior_Design_Rating : num 4.8 3 3 5 3 5 3 3 3 3 ...
## $ Performance_Rating : num 4.8 4 4 5 4 4 4 4 4 4 ...
## $ Value_For_Money_Rating : num 4.2 4 4 5 4 4 4 4 4 4 ...
## $ Exterior_Styling_Rating: num 5 5 5 5 5 4 5 5 5 5 ...
## $ Reliability_Rating : num 5 5 5 5 5 5 5 5 5 5 ...
## $ State : chr "CA" "CA" "CA" "CA" ...
## $ Seller_Type : chr "Dealer" "Dealer" "Dealer" "Dealer" ...
Pasos para construir un modelo de regresion
Paso 1: Determinar las variables X,Y
Graficamos mediante un diagrama de dispersión
# Paso 0: Cargar las librerías necesarias
# install.packages("ggplot2") # Ejecutar solo si no tienes la librería instalada
# install.packages("psych") # Ejecutar solo si no tienes la librería instalada
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Paso 1: Cargar los datos
honda <- data.frame(
Performance_Rating = c(4.8, 4.0, 4.0, 5.0, 4.0, 4.0),
Value_For_Money_Rating = c(4.2, 4.0, 4.0, 5.0, 4.0, 4.0)
)
# Paso 2: Graficar un diagrama de dispersión
plot(x = honda$Performance_Rating, y = honda$Value_For_Money_Rating,
xlab = "Performance Rating", ylab = "Value for Money Rating",
main = "Scatter Plot of Performance vs Value for Money")
# Paso 3: Crear un modelo de regresión lineal
modelo <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)
# Paso 4: Graficar la línea de regresión en el diagrama de dispersión
abline(modelo, col = "red")
# Paso 5: Mostrar el resumen del modelo
summary(modelo)
##
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
##
## Residuals:
## 1 2 3 4 5 6
## -0.36364 0.01818 0.01818 0.29091 0.01818 0.01818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0727 0.9623 1.115 0.3274
## Performance_Rating 0.7273 0.2227 3.266 0.0309 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared: 0.7273, Adjusted R-squared: 0.6591
## F-statistic: 10.67 on 1 and 4 DF, p-value: 0.03091
# Paso 6: Graficar el gráfico de pares con la función pairs.panels()
dev.new()
pairs.panels(honda)
# Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating
correlation_performance_value <- cor(honda$Performance_Rating, honda$Value_For_Money_Rating)
cat("Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating:", correlation_performance_value, "\n")
## Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating: 0.8528029
# Si tienes más variables, repite este proceso para cada par de variables
# lm, notación: Y ~ X, data=
modelo1 <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)
# Resumen de resultados
summary(modelo1)
##
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
##
## Residuals:
## 1 2 3 4 5 6
## -0.36364 0.01818 0.01818 0.29091 0.01818 0.01818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0727 0.9623 1.115 0.3274
## Performance_Rating 0.7273 0.2227 3.266 0.0309 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared: 0.7273, Adjusted R-squared: 0.6591
## F-statistic: 10.67 on 1 and 4 DF, p-value: 0.03091
# Modelo específico para las variables de honda
modelo_especifico_honda <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)
# Resumen del modelo
summary(modelo_especifico_honda)
##
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
##
## Residuals:
## 1 2 3 4 5 6
## -0.36364 0.01818 0.01818 0.29091 0.01818 0.01818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0727 0.9623 1.115 0.3274
## Performance_Rating 0.7273 0.2227 3.266 0.0309 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared: 0.7273, Adjusted R-squared: 0.6591
## F-statistic: 10.67 on 1 and 4 DF, p-value: 0.03091