Importar la base de datos

df <- read.csv("C:\\Users\\karla\\Desktop\\CONCENTRACION\\Modulo_progra\\HousePriceData.csv")

Entender bases de datos

str(df)
## 'data.frame':    905 obs. of  10 variables:
##  $ Observation  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Dist_Taxi    : int  9796 8294 11001 8301 10510 6665 13153 5882 7495 8233 ...
##  $ Dist_Market  : int  5250 8186 14399 11188 12629 5142 11869 9948 11589 7067 ...
##  $ Dist_Hospital: int  10703 12694 16991 12289 13921 9972 17811 13315 13370 11400 ...
##  $ Carpet       : int  1659 1461 1340 1451 1770 1442 1542 1261 1090 1030 ...
##  $ Builtup      : int  1961 1752 1609 1748 2111 1733 1858 1507 1321 1235 ...
##  $ Parking      : chr  "Open" "Not Provided" "Not Provided" "Covered" ...
##  $ City_Category: chr  "CAT B" "CAT B" "CAT A" "CAT B" ...
##  $ Rainfall     : int  530 210 720 620 450 760 1030 1020 680 1130 ...
##  $ House_Price  : int  6649000 3982000 5401000 5373000 4662000 4526000 7224000 3772000 4631000 4415000 ...
summary(df)
##   Observation      Dist_Taxi      Dist_Market    Dist_Hospital  
##  Min.   :  1.0   Min.   :  146   Min.   : 1666   Min.   : 3227  
##  1st Qu.:237.0   1st Qu.: 6477   1st Qu.: 9367   1st Qu.:11302  
##  Median :469.0   Median : 8228   Median :11149   Median :13189  
##  Mean   :468.4   Mean   : 8235   Mean   :11022   Mean   :13091  
##  3rd Qu.:700.0   3rd Qu.: 9939   3rd Qu.:12675   3rd Qu.:14855  
##  Max.   :932.0   Max.   :20662   Max.   :20945   Max.   :23294  
##                                                                 
##      Carpet         Builtup        Parking          City_Category     
##  Min.   :  775   Min.   :  932   Length:905         Length:905        
##  1st Qu.: 1317   1st Qu.: 1579   Class :character   Class :character  
##  Median : 1478   Median : 1774   Mode  :character   Mode  :character  
##  Mean   : 1511   Mean   : 1794                                        
##  3rd Qu.: 1654   3rd Qu.: 1985                                        
##  Max.   :24300   Max.   :12730                                        
##  NA's   :7                                                            
##     Rainfall       House_Price       
##  Min.   :-110.0   Min.   :  1492000  
##  1st Qu.: 600.0   1st Qu.:  4623000  
##  Median : 780.0   Median :  5860000  
##  Mean   : 786.9   Mean   :  6083992  
##  3rd Qu.: 970.0   3rd Qu.:  7200000  
##  Max.   :1560.0   Max.   :150000000  
## 

Limpiar outlier

boxplot(df$House_Price)

which(df$House_Price %in% boxplot.stats(df$House_Price)$out)
## [1] 348 659
df[c(348, 659), ]
##     Observation Dist_Taxi Dist_Market Dist_Hospital Carpet Builtup    Parking
## 348         361     20662       20945         23294  24300   12730    Covered
## 659         679      7288        9560         12531   1989    2414 No Parking
##     City_Category Rainfall House_Price
## 348         CAT B     1130   150000000
## 659         CAT A      860    11632000
df_clean <- df[-348, ]

Modelo

regresion <- lm(House_Price ~ Observation +
                Dist_Hospital + Carpet + Builtup + 
                factor(Parking) + factor(City_Category), 
                data = df)

summary(regresion)
## 
## Call:
## lm(formula = House_Price ~ Observation + Dist_Hospital + Carpet + 
##     Builtup + factor(Parking) + factor(City_Category), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3655901  -815219   -56150   787929  4464813 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  5.247e+06  3.459e+05  15.168  < 2e-16 ***
## Observation                  4.059e+02  1.522e+02   2.666   0.0078 ** 
## Dist_Hospital                7.921e+01  1.607e+01   4.928 9.90e-07 ***
## Carpet                       9.905e+03  1.414e+02  70.058  < 2e-16 ***
## Builtup                     -7.552e+03  2.397e+02 -31.506  < 2e-16 ***
## factor(Parking)No Parking   -6.109e+05  1.385e+05  -4.411 1.15e-05 ***
## factor(Parking)Not Provided -4.909e+05  1.233e+05  -3.980 7.44e-05 ***
## factor(Parking)Open         -2.566e+05  1.126e+05  -2.279   0.0229 *  
## factor(City_Category)CAT B  -1.875e+06  9.573e+04 -19.588  < 2e-16 ***
## factor(City_Category)CAT C  -2.894e+06  1.056e+05 -27.394  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1223000 on 888 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.9432, Adjusted R-squared:  0.9426 
## F-statistic:  1639 on 9 and 888 DF,  p-value: < 2.2e-16

Generar pronosticos

datos_nuevos <- data.frame(
  Observation = 1,
  Dist_Hospital = 1000,
  Carpet = 1500,
  Builtup = 1800,
  Parking = "Open",
  City_Category = "CAT B"
)

predict(regresion, datos_nuevos)
##       1 
## 4458332
mean(df$House_Price)
## [1] 6083992
median(df$House_Price)
## [1] 5860000

Conclusiones:

  1. Modelo altamente significativo con un poder explicativo del 94% , lo que indica que las variables incluidas explican gran parte de la variabilidad del precio de las casas.
  2. El tamaño de la propiedad es el factor más determinante, especialmente la variable Carpet.
  3. La categoría de la ciudad influye significativamente en el valor de la vivienda.
  4. El tipo de estacionamiento impacta negativamente cuando no es óptimo, mostrando que la disponibilidad y calidad del parking afectan el valor final.
  5. La distancia al hospital tiene un efecto positivo moderado.

El modelo predice que una vivienda con 1500 de área carpet m2, 1800 m2 construidos, ubicada en una ciudad categoría B, con estacionamiento abierto y a 1000 metros de un hospital, tiene un valor estimado de 4,458,332.

LS0tDQp0aXRsZTogIlJlZ3Jlc2nDs24gSG91c2UgUHJpY2luZyINCmF1dGhvcjogIkthcmxhIEzDs3BleiBBMDA4Mzk3OTAiDQpkYXRlOiAiMjAyNi0wMi0xNyINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6IA0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUNCiAgICB0aGVtZTogY29zbW8NCi0tLQ0KIyBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zDQpgYGB7cn0NCmRmIDwtIHJlYWQuY3N2KCJDOlxcVXNlcnNcXGthcmxhXFxEZXNrdG9wXFxDT05DRU5UUkFDSU9OXFxNb2R1bG9fcHJvZ3JhXFxIb3VzZVByaWNlRGF0YS5jc3YiKQ0KYGBgDQoNCiMgRW50ZW5kZXIgYmFzZXMgZGUgZGF0b3MNCmBgYHtyfQ0Kc3RyKGRmKQ0Kc3VtbWFyeShkZikNCmBgYA0KIyBMaW1waWFyIG91dGxpZXIgDQpgYGB7cn0NCg0KYm94cGxvdChkZiRIb3VzZV9QcmljZSkNCmBgYA0KYGBge3J9DQp3aGljaChkZiRIb3VzZV9QcmljZSAlaW4lIGJveHBsb3Quc3RhdHMoZGYkSG91c2VfUHJpY2UpJG91dCkNCg0KYGBgDQpgYGB7cn0NCmRmW2MoMzQ4LCA2NTkpLCBdDQoNCmBgYA0KYGBge3J9DQpkZl9jbGVhbiA8LSBkZlstMzQ4LCBdDQoNCmBgYA0KDQoNCiMgTW9kZWxvIA0KYGBge3J9DQpyZWdyZXNpb24gPC0gbG0oSG91c2VfUHJpY2UgfiBPYnNlcnZhdGlvbiArDQogICAgICAgICAgICAgICAgRGlzdF9Ib3NwaXRhbCArIENhcnBldCArIEJ1aWx0dXAgKyANCiAgICAgICAgICAgICAgICBmYWN0b3IoUGFya2luZykgKyBmYWN0b3IoQ2l0eV9DYXRlZ29yeSksIA0KICAgICAgICAgICAgICAgIGRhdGEgPSBkZikNCg0Kc3VtbWFyeShyZWdyZXNpb24pDQoNCmBgYA0KIyBHZW5lcmFyIHByb25vc3RpY29zIA0KYGBge3J9DQpkYXRvc19udWV2b3MgPC0gZGF0YS5mcmFtZSgNCiAgT2JzZXJ2YXRpb24gPSAxLA0KICBEaXN0X0hvc3BpdGFsID0gMTAwMCwNCiAgQ2FycGV0ID0gMTUwMCwNCiAgQnVpbHR1cCA9IDE4MDAsDQogIFBhcmtpbmcgPSAiT3BlbiIsDQogIENpdHlfQ2F0ZWdvcnkgPSAiQ0FUIEIiDQopDQoNCnByZWRpY3QocmVncmVzaW9uLCBkYXRvc19udWV2b3MpDQoNCmBgYA0KYGBge3J9DQptZWFuKGRmJEhvdXNlX1ByaWNlKQ0KbWVkaWFuKGRmJEhvdXNlX1ByaWNlKQ0KYGBgDQoNCiMgQ29uY2x1c2lvbmVzOiANCjEuIE1vZGVsbyBhbHRhbWVudGUgc2lnbmlmaWNhdGl2byBjb24gdW4gcG9kZXIgZXhwbGljYXRpdm8gZGVsIDk0JSAsIGxvIHF1ZSBpbmRpY2EgcXVlIGxhcyB2YXJpYWJsZXMgaW5jbHVpZGFzIGV4cGxpY2FuIGdyYW4gcGFydGUgZGUgbGEgdmFyaWFiaWxpZGFkIGRlbCBwcmVjaW8gZGUgbGFzIGNhc2FzLg0KMi4gRWwgdGFtYcOxbyBkZSBsYSBwcm9waWVkYWQgZXMgZWwgZmFjdG9yIG3DoXMgZGV0ZXJtaW5hbnRlLCBlc3BlY2lhbG1lbnRlIGxhIHZhcmlhYmxlIENhcnBldC4NCjMuIExhIGNhdGVnb3LDrWEgZGUgbGEgY2l1ZGFkIGluZmx1eWUgc2lnbmlmaWNhdGl2YW1lbnRlIGVuIGVsIHZhbG9yIGRlIGxhIHZpdmllbmRhLiANCjQuIEVsIHRpcG8gZGUgZXN0YWNpb25hbWllbnRvIGltcGFjdGEgbmVnYXRpdmFtZW50ZSBjdWFuZG8gbm8gZXMgw7NwdGltbywgbW9zdHJhbmRvIHF1ZSBsYSBkaXNwb25pYmlsaWRhZCB5IGNhbGlkYWQgZGVsIHBhcmtpbmcgYWZlY3RhbiBlbCB2YWxvciBmaW5hbC4NCjUuIExhIGRpc3RhbmNpYSBhbCBob3NwaXRhbCB0aWVuZSB1biBlZmVjdG8gcG9zaXRpdm8gbW9kZXJhZG8uIA0KDQpFbCBtb2RlbG8gcHJlZGljZSBxdWUgdW5hIHZpdmllbmRhIGNvbiAxNTAwIGRlIMOhcmVhIGNhcnBldCBtMiwgMTgwMCBtMiBjb25zdHJ1aWRvcywgdWJpY2FkYSBlbiB1bmEgY2l1ZGFkIGNhdGVnb3LDrWEgQiwgY29uIGVzdGFjaW9uYW1pZW50byBhYmllcnRvIHkgYSAxMDAwIG1ldHJvcyBkZSB1biBob3NwaXRhbCwgdGllbmUgdW4gdmFsb3IgZXN0aW1hZG8gZGUgNCw0NTgsMzMyLg==