Regresión Lineal

Importar la base de datos de csv

library(readr)
data <- read_csv("C:/Users/robie/Downloads/HousePriceData.csv")
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Parking, City_Category
## dbl (8): Observation, Dist_Taxi, Dist_Market, Dist_Hospital, Carpet, Builtup...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Usar file.choose()

Entender la base de datos

str(data)
## spc_tbl_ [905 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Observation  : num [1:905] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Dist_Taxi    : num [1:905] 9796 8294 11001 8301 10510 ...
##  $ Dist_Market  : num [1:905] 5250 8186 14399 11188 12629 ...
##  $ Dist_Hospital: num [1:905] 10703 12694 16991 12289 13921 ...
##  $ Carpet       : num [1:905] 1659 1461 1340 1451 1770 ...
##  $ Builtup      : num [1:905] 1961 1752 1609 1748 2111 ...
##  $ Parking      : chr [1:905] "Open" "Not Provided" "Not Provided" "Covered" ...
##  $ City_Category: chr [1:905] "CAT B" "CAT B" "CAT A" "CAT B" ...
##  $ Rainfall     : num [1:905] 530 210 720 620 450 760 1030 1020 680 1130 ...
##  $ House_Price  : num [1:905] 6649000 3982000 5401000 5373000 4662000 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Observation = col_double(),
##   ..   Dist_Taxi = col_double(),
##   ..   Dist_Market = col_double(),
##   ..   Dist_Hospital = col_double(),
##   ..   Carpet = col_double(),
##   ..   Builtup = col_double(),
##   ..   Parking = col_character(),
##   ..   City_Category = col_character(),
##   ..   Rainfall = col_double(),
##   ..   House_Price = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##   Observation      Dist_Taxi      Dist_Market    Dist_Hospital  
##  Min.   :  1.0   Min.   :  146   Min.   : 1666   Min.   : 3227  
##  1st Qu.:237.0   1st Qu.: 6477   1st Qu.: 9367   1st Qu.:11302  
##  Median :469.0   Median : 8228   Median :11149   Median :13189  
##  Mean   :468.4   Mean   : 8235   Mean   :11022   Mean   :13091  
##  3rd Qu.:700.0   3rd Qu.: 9939   3rd Qu.:12675   3rd Qu.:14855  
##  Max.   :932.0   Max.   :20662   Max.   :20945   Max.   :23294  
##                                                                 
##      Carpet         Builtup        Parking          City_Category     
##  Min.   :  775   Min.   :  932   Length:905         Length:905        
##  1st Qu.: 1317   1st Qu.: 1579   Class :character   Class :character  
##  Median : 1478   Median : 1774   Mode  :character   Mode  :character  
##  Mean   : 1511   Mean   : 1794                                        
##  3rd Qu.: 1654   3rd Qu.: 1985                                        
##  Max.   :24300   Max.   :12730                                        
##  NA's   :7                                                            
##     Rainfall       House_Price       
##  Min.   :-110.0   Min.   :  1492000  
##  1st Qu.: 600.0   1st Qu.:  4623000  
##  Median : 780.0   Median :  5860000  
##  Mean   : 786.9   Mean   :  6083992  
##  3rd Qu.: 970.0   3rd Qu.:  7200000  
##  Max.   :1560.0   Max.   :150000000  
## 

Generar el Modelo

regresion <- lm(
  House_Price ~ Dist_Taxi + Dist_Market + Dist_Hospital +
    Carpet + Builtup + factor(Parking) + factor(City_Category) + Rainfall,
  data = data,
  na.action = na.omit)
summary(regresion)
## 
## Call:
## lm(formula = House_Price ~ Dist_Taxi + Dist_Market + Dist_Hospital + 
##     Carpet + Builtup + factor(Parking) + factor(City_Category) + 
##     Rainfall, data = data, na.action = na.omit)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3586934  -837542   -65314   784513  4577689 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  5.568e+06  3.688e+05  15.097  < 2e-16 ***
## Dist_Taxi                    2.834e+01  2.694e+01   1.052   0.2931    
## Dist_Market                  1.237e+01  2.089e+01   0.592   0.5538    
## Dist_Hospital                5.071e+01  3.021e+01   1.679   0.0936 .  
## Carpet                       9.907e+03  1.428e+02  69.398  < 2e-16 ***
## Builtup                     -7.575e+03  2.412e+02 -31.403  < 2e-16 ***
## factor(Parking)No Parking   -6.170e+05  1.393e+05  -4.429 1.06e-05 ***
## factor(Parking)Not Provided -5.077e+05  1.239e+05  -4.096 4.58e-05 ***
## factor(Parking)Open         -2.597e+05  1.131e+05  -2.297   0.0218 *  
## factor(City_Category)CAT B  -1.883e+06  9.641e+04 -19.529  < 2e-16 ***
## factor(City_Category)CAT C  -2.902e+06  1.062e+05 -27.321  < 2e-16 ***
## Rainfall                    -9.984e+01  1.548e+02  -0.645   0.5191    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1228000 on 886 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.9429, Adjusted R-squared:  0.9422 
## F-statistic:  1329 on 11 and 886 DF,  p-value: < 2.2e-16

Predicción

datos_nuevos <- data.frame(
  Dist_Taxi = 9000,
  Dist_Market = 6000,
  Dist_Hospital = 12000,
  Carpet = 1600,
  Builtup = 1900,
  Parking = "Open",         
  City_Category = "CAT B",   
  Rainfall = 500
)
predict(regresion, newdata = datos_nuevos)
##       1 
## 5772055

Conclusión

Resultados indican que el valor de una vivienda está determinado principalmente por su tamaño, la ciudad en la que se ubica y la disponibilidad de estacionamiento. En contraste, factores como la cercanía a servicios o las condiciones de lluvia no presentan un impacto significativo en el precio dentro de este análisis. El modelo presenta un buen desempeño, ya que logra explicar la mayor parte de la variación observada en los precios de las viviendas. Con base en la información analizada, se estima que una casa con dichas características tendría un valor aproximado de 5,772,055. En este sentido, el modelo resulta útil tanto para identificar los factores clave que influyen en el precio de una vivienda como para realizar estimaciones de inmuebles con características similares.

LS0tDQp0aXRsZTogImhvdXNlIHByaWNpbmciDQphdXRob3I6ICJSb2JpZSINCmRhdGU6ICIyMDI2LTAyLTE3Ig0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUNCiAgICB0aGVtZTogY29zbW8NCi0tLQ0KDQojIFJlZ3Jlc2nDs24gTGluZWFsDQojIEltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3MgZGUgY3N2DQpgYGB7cn0NCmxpYnJhcnkocmVhZHIpDQpkYXRhIDwtIHJlYWRfY3N2KCJDOi9Vc2Vycy9yb2JpZS9Eb3dubG9hZHMvSG91c2VQcmljZURhdGEuY3N2IikNCmBgYA0KDQoNCiMgVXNhciBmaWxlLmNob29zZSgpDQojIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MNCmBgYHtyfQ0Kc3RyKGRhdGEpDQpzdW1tYXJ5KGRhdGEpDQpgYGANCg0KIyBHZW5lcmFyIGVsIE1vZGVsbw0KYGBge3J9DQpyZWdyZXNpb24gPC0gbG0oDQogIEhvdXNlX1ByaWNlIH4gRGlzdF9UYXhpICsgRGlzdF9NYXJrZXQgKyBEaXN0X0hvc3BpdGFsICsNCiAgICBDYXJwZXQgKyBCdWlsdHVwICsgZmFjdG9yKFBhcmtpbmcpICsgZmFjdG9yKENpdHlfQ2F0ZWdvcnkpICsgUmFpbmZhbGwsDQogIGRhdGEgPSBkYXRhLA0KICBuYS5hY3Rpb24gPSBuYS5vbWl0KQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCiANCiMgUHJlZGljY2nDs24NCmBgYHtyfQ0KZGF0b3NfbnVldm9zIDwtIGRhdGEuZnJhbWUoDQogIERpc3RfVGF4aSA9IDkwMDAsDQogIERpc3RfTWFya2V0ID0gNjAwMCwNCiAgRGlzdF9Ib3NwaXRhbCA9IDEyMDAwLA0KICBDYXJwZXQgPSAxNjAwLA0KICBCdWlsdHVwID0gMTkwMCwNCiAgUGFya2luZyA9ICJPcGVuIiwgICAgICAgICANCiAgQ2l0eV9DYXRlZ29yeSA9ICJDQVQgQiIsICAgDQogIFJhaW5mYWxsID0gNTAwDQopDQpwcmVkaWN0KHJlZ3Jlc2lvbiwgbmV3ZGF0YSA9IGRhdG9zX251ZXZvcykNCmBgYA0KDQojIENvbmNsdXNpw7NuDQoNClJlc3VsdGFkb3MgaW5kaWNhbiBxdWUgZWwgdmFsb3IgZGUgdW5hIHZpdmllbmRhIGVzdMOhIGRldGVybWluYWRvIHByaW5jaXBhbG1lbnRlIHBvciBzdSB0YW1hw7FvLCBsYSBjaXVkYWQgZW4gbGEgcXVlIHNlIHViaWNhIHkgbGEgZGlzcG9uaWJpbGlkYWQgZGUgZXN0YWNpb25hbWllbnRvLiBFbiBjb250cmFzdGUsIGZhY3RvcmVzIGNvbW8gbGEgY2VyY2Fuw61hIGEgc2VydmljaW9zIG8gbGFzIGNvbmRpY2lvbmVzIGRlIGxsdXZpYSBubyBwcmVzZW50YW4gdW4gaW1wYWN0byBzaWduaWZpY2F0aXZvIGVuIGVsIHByZWNpbyBkZW50cm8gZGUgZXN0ZSBhbsOhbGlzaXMuDQpFbCBtb2RlbG8gcHJlc2VudGEgdW4gYnVlbiBkZXNlbXBlw7FvLCB5YSBxdWUgbG9ncmEgZXhwbGljYXIgbGEgbWF5b3IgcGFydGUgZGUgbGEgdmFyaWFjacOzbiBvYnNlcnZhZGEgZW4gbG9zIHByZWNpb3MgZGUgbGFzIHZpdmllbmRhcy4gQ29uIGJhc2UgZW4gbGEgaW5mb3JtYWNpw7NuIGFuYWxpemFkYSwgc2UgZXN0aW1hIHF1ZSB1bmEgY2FzYSBjb24gZGljaGFzIGNhcmFjdGVyw61zdGljYXMgdGVuZHLDrWEgdW4gdmFsb3IgYXByb3hpbWFkbyBkZSA1LDc3MiwwNTUuIEVuIGVzdGUgc2VudGlkbywgZWwgbW9kZWxvIHJlc3VsdGEgw7p0aWwgdGFudG8gcGFyYSBpZGVudGlmaWNhciBsb3MgZmFjdG9yZXMgY2xhdmUgcXVlIGluZmx1eWVuIGVuIGVsIHByZWNpbyBkZSB1bmEgdml2aWVuZGEgY29tbyBwYXJhIHJlYWxpemFyIGVzdGltYWNpb25lcyBkZSBpbm11ZWJsZXMgY29uIGNhcmFjdGVyw61zdGljYXMgc2ltaWxhcmVzLg0KDQoNCg==