title: “modelamiento predicivo” author: “DIlmer Sandoval” date: “2024-01-27” output: html_document

honda <- read.csv("honda_sell_data.csv",
                                       sep=",")
                                       
head(honda)                             
##   Year  Make                     Model Condition   Price Consumer_Rating
## 1 2023 Honda             Ridgeline RTL       New $46,370             4.8
## 2 2023 Honda         CR-V Hybrid Sport       New $34,150             1.7
## 3 2023 Honda         CR-V Hybrid Sport       New $34,245             4.7
## 4 2022 Honda          Pilot TrailSport       New $46,500             5.0
## 5 2023 Honda CR-V Hybrid Sport Touring       New $40,395             4.4
## 6 2023 Honda              Odyssey EX-L       New $42,250             4.4
##   Consumer_Review_.          Exterior_Color Interior_Color        Drivetrain
## 1                 9    Platinum White Pearl          Beige   All-wheel Drive
## 2                24    Platinum White Pearl          Black               FWD
## 3              2869 Meteorite Gray Metallic          Black Front-wheel Drive
## 4                12        Sonic Gray Pearl          Black   All-wheel Drive
## 5                12     Crystal Black Pearl          Black   All-wheel Drive
## 6                12 Pacific Pewter Metallic          Mocha Front-wheel Drive
##     MPG Fuel_Type       Transmission                      Engine
## 1        Gasoline          Automatic        3.5L V6 24V GDI SOHC
## 2          Hybrid      Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 3          Hybrid      Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 4 19–25  Gasoline  9-Speed Automatic        3.5L V6 24V GDI SOHC
## 5          Hybrid      Automatic CVT 2.0L I4 16V GDI DOHC Hybrid
## 6        Gasoline 10-Speed Automatic        3.5L V6 24V GDI SOHC
##                 VIN  Stock_. Mileage Comfort_Rating Interior_Design_Rating
## 1 5FPYK3F58PB011817 00830164      10              5                    4.8
## 2 2HKRS5H5XPH702953  6402953       –              5                    3.0
## 3 2HKRS5H55PH704805 PH704805       1              5                    3.0
## 4 5FNYF6H82NB100429 5700429B       5              5                    5.0
## 5 2HKRS6H90PH805954  5705954       5              5                    3.0
## 6 5FNRL6H67PB024270  5724270       5              5                    5.0
##   Performance_Rating Value_For_Money_Rating Exterior_Styling_Rating
## 1                4.8                    4.2                       5
## 2                4.0                    4.0                       5
## 3                4.0                    4.0                       5
## 4                5.0                    5.0                       5
## 5                4.0                    4.0                       5
## 6                4.0                    4.0                       4
##   Reliability_Rating State Seller_Type
## 1                  5    CA      Dealer
## 2                  5    CA      Dealer
## 3                  5    CA      Dealer
## 4                  5    CA      Dealer
## 5                  5    CA      Dealer
## 6                  5    CA      Dealer
str(honda)
## 'data.frame':    4999 obs. of  25 variables:
##  $ Year                   : int  2023 2023 2023 2022 2023 2023 2023 2023 2023 2023 ...
##  $ Make                   : chr  "Honda" "Honda" "Honda" "Honda" ...
##  $ Model                  : chr  "Ridgeline RTL" "CR-V Hybrid Sport" "CR-V Hybrid Sport" "Pilot TrailSport" ...
##  $ Condition              : chr  "New" "New" "New" "New" ...
##  $ Price                  : chr  "$46,370" "$34,150" "$34,245" "$46,500" ...
##  $ Consumer_Rating        : num  4.8 1.7 4.7 5 4.4 4.4 4.4 1.2 1.2 1.2 ...
##  $ Consumer_Review_.      : int  9 24 2869 12 12 12 12 70 70 70 ...
##  $ Exterior_Color         : chr  "Platinum White Pearl" "Platinum White Pearl" "Meteorite Gray Metallic" "Sonic Gray Pearl" ...
##  $ Interior_Color         : chr  "Beige" "Black" "Black" "Black" ...
##  $ Drivetrain             : chr  "All-wheel Drive" "FWD" "Front-wheel Drive" "All-wheel Drive" ...
##  $ MPG                    : chr  "" "" "" "19–25" ...
##  $ Fuel_Type              : chr  "Gasoline" "Hybrid" "Hybrid" "Gasoline" ...
##  $ Transmission           : chr  "Automatic" "Automatic CVT" "Automatic CVT" "9-Speed Automatic" ...
##  $ Engine                 : chr  "3.5L V6 24V GDI SOHC" "2.0L I4 16V GDI DOHC Hybrid" "2.0L I4 16V GDI DOHC Hybrid" "3.5L V6 24V GDI SOHC" ...
##  $ VIN                    : chr  "5FPYK3F58PB011817" "2HKRS5H5XPH702953" "2HKRS5H55PH704805" "5FNYF6H82NB100429" ...
##  $ Stock_.                : chr  "00830164" "6402953" "PH704805" "5700429B" ...
##  $ Mileage                : chr  "10" "–" "1" "5" ...
##  $ Comfort_Rating         : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ Interior_Design_Rating : num  4.8 3 3 5 3 5 3 3 3 3 ...
##  $ Performance_Rating     : num  4.8 4 4 5 4 4 4 4 4 4 ...
##  $ Value_For_Money_Rating : num  4.2 4 4 5 4 4 4 4 4 4 ...
##  $ Exterior_Styling_Rating: num  5 5 5 5 5 4 5 5 5 5 ...
##  $ Reliability_Rating     : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ State                  : chr  "CA" "CA" "CA" "CA" ...
##  $ Seller_Type            : chr  "Dealer" "Dealer" "Dealer" "Dealer" ...

Pasos para construir un modelo de regresion

Paso 1: Determinar las variables X,Y

Graficamos mediante un diagrama de dispersión

# Paso 0: Cargar las librerías necesarias
# install.packages("ggplot2")  # Ejecutar solo si no tienes la librería instalada
# install.packages("psych")    # Ejecutar solo si no tienes la librería instalada
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# Paso 1: Cargar los datos
honda <- data.frame(
  Performance_Rating = c(4.8, 4.0, 4.0, 5.0, 4.0, 4.0),
  Value_For_Money_Rating = c(4.2, 4.0, 4.0, 5.0, 4.0, 4.0)
)


# Paso 2: Graficar un diagrama de dispersión
plot(x = honda$Performance_Rating, y = honda$Value_For_Money_Rating, 
     xlab = "Performance Rating", ylab = "Value for Money Rating",
     main = "Scatter Plot of Performance vs Value for Money")

# Paso 3: Crear un modelo de regresión lineal
modelo <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)

# Paso 4: Graficar la línea de regresión en el diagrama de dispersión
abline(modelo, col = "red")

# Paso 5: Mostrar el resumen del modelo
summary(modelo)
## 
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
## 
## Residuals:
##        1        2        3        4        5        6 
## -0.36364  0.01818  0.01818  0.29091  0.01818  0.01818 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          1.0727     0.9623   1.115   0.3274  
## Performance_Rating   0.7273     0.2227   3.266   0.0309 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared:  0.7273, Adjusted R-squared:  0.6591 
## F-statistic: 10.67 on 1 and 4 DF,  p-value: 0.03091
# Paso 6: Graficar el gráfico de pares con la función pairs.panels()
dev.new()
pairs.panels(honda)


# Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating
correlation_performance_value <- cor(honda$Performance_Rating, honda$Value_For_Money_Rating)
cat("Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating:", correlation_performance_value, "\n")
## Coeficiente de correlación entre Performance_Rating y Value_For_Money_Rating: 0.8528029
# Si tienes más variables, repite este proceso para cada par de variables


# lm, notación: Y ~ X, data=
modelo1 <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)

# Resumen de resultados
summary(modelo1)
## 
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
## 
## Residuals:
##        1        2        3        4        5        6 
## -0.36364  0.01818  0.01818  0.29091  0.01818  0.01818 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          1.0727     0.9623   1.115   0.3274  
## Performance_Rating   0.7273     0.2227   3.266   0.0309 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared:  0.7273, Adjusted R-squared:  0.6591 
## F-statistic: 10.67 on 1 and 4 DF,  p-value: 0.03091
# Modelo específico para las variables de honda
modelo_especifico_honda <- lm(Value_For_Money_Rating ~ Performance_Rating, data = honda)

# Resumen del modelo
summary(modelo_especifico_honda)
## 
## Call:
## lm(formula = Value_For_Money_Rating ~ Performance_Rating, data = honda)
## 
## Residuals:
##        1        2        3        4        5        6 
## -0.36364  0.01818  0.01818  0.29091  0.01818  0.01818 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          1.0727     0.9623   1.115   0.3274  
## Performance_Rating   0.7273     0.2227   3.266   0.0309 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 4 degrees of freedom
## Multiple R-squared:  0.7273, Adjusted R-squared:  0.6591 
## F-statistic: 10.67 on 1 and 4 DF,  p-value: 0.03091