REGRESIÓN
LINEAL
# Importar base de datos
data <- read.csv("/Users/nataliamartinez/Desktop/HousePriceData.csv")
# Explorar datos
str(data)
## 'data.frame': 905 obs. of 10 variables:
## $ Observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Dist_Taxi : int 9796 8294 11001 8301 10510 6665 13153 5882 7495 8233 ...
## $ Dist_Market : int 5250 8186 14399 11188 12629 5142 11869 9948 11589 7067 ...
## $ Dist_Hospital: int 10703 12694 16991 12289 13921 9972 17811 13315 13370 11400 ...
## $ Carpet : int 1659 1461 1340 1451 1770 1442 1542 1261 1090 1030 ...
## $ Builtup : int 1961 1752 1609 1748 2111 1733 1858 1507 1321 1235 ...
## $ Parking : chr "Open" "Not Provided" "Not Provided" "Covered" ...
## $ City_Category: chr "CAT B" "CAT B" "CAT A" "CAT B" ...
## $ Rainfall : int 530 210 720 620 450 760 1030 1020 680 1130 ...
## $ House_Price : int 6649000 3982000 5401000 5373000 4662000 4526000 7224000 3772000 4631000 4415000 ...
summary(data)
## Observation Dist_Taxi Dist_Market Dist_Hospital
## Min. : 1.0 Min. : 146 Min. : 1666 Min. : 3227
## 1st Qu.:237.0 1st Qu.: 6477 1st Qu.: 9367 1st Qu.:11302
## Median :469.0 Median : 8228 Median :11149 Median :13189
## Mean :468.4 Mean : 8235 Mean :11022 Mean :13091
## 3rd Qu.:700.0 3rd Qu.: 9939 3rd Qu.:12675 3rd Qu.:14855
## Max. :932.0 Max. :20662 Max. :20945 Max. :23294
##
## Carpet Builtup Parking City_Category
## Min. : 775 Min. : 932 Length:905 Length:905
## 1st Qu.: 1317 1st Qu.: 1579 Class :character Class :character
## Median : 1478 Median : 1774 Mode :character Mode :character
## Mean : 1511 Mean : 1794
## 3rd Qu.: 1654 3rd Qu.: 1985
## Max. :24300 Max. :12730
## NA's :7
## Rainfall House_Price
## Min. :-110.0 Min. : 1492000
## 1st Qu.: 600.0 1st Qu.: 4623000
## Median : 780.0 Median : 5860000
## Mean : 786.9 Mean : 6083992
## 3rd Qu.: 970.0 3rd Qu.: 7200000
## Max. :1560.0 Max. :150000000
##
#Asegura que las categorías sean factores (antes de predecir)
data$Parking <- as.factor(data$Parking)
data$City_Category <- as.factor(data$City_Category)
#Modelo de regresión
modelo <- lm(
House_Price ~ Dist_Taxi + Dist_Market + Dist_Hospital +
Carpet + Builtup + Parking + City_Category + Rainfall,
data = data
)
summary(modelo)
##
## Call:
## lm(formula = House_Price ~ Dist_Taxi + Dist_Market + Dist_Hospital +
## Carpet + Builtup + Parking + City_Category + Rainfall, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3586934 -837542 -65314 784513 4577689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.568e+06 3.688e+05 15.097 < 2e-16 ***
## Dist_Taxi 2.834e+01 2.694e+01 1.052 0.2931
## Dist_Market 1.237e+01 2.089e+01 0.592 0.5538
## Dist_Hospital 5.071e+01 3.021e+01 1.679 0.0936 .
## Carpet 9.907e+03 1.428e+02 69.398 < 2e-16 ***
## Builtup -7.575e+03 2.412e+02 -31.403 < 2e-16 ***
## ParkingNo Parking -6.170e+05 1.393e+05 -4.429 1.06e-05 ***
## ParkingNot Provided -5.077e+05 1.239e+05 -4.096 4.58e-05 ***
## ParkingOpen -2.597e+05 1.131e+05 -2.297 0.0218 *
## City_CategoryCAT B -1.883e+06 9.641e+04 -19.529 < 2e-16 ***
## City_CategoryCAT C -2.902e+06 1.062e+05 -27.321 < 2e-16 ***
## Rainfall -9.984e+01 1.548e+02 -0.645 0.5191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1228000 on 886 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.9429, Adjusted R-squared: 0.9422
## F-statistic: 1329 on 11 and 886 DF, p-value: < 2.2e-16
Pronóstico
1
#Casa 1
datos_nuevos <- data.frame(
Dist_Taxi = 9796,
Dist_Market = 5250,
Dist_Hospital = 10703,
Carpet = 1659,
Builtup = 1961,
Parking = factor("Open", levels = levels(data$Parking)),
City_Category = factor("CAT B", levels = levels(data$City_Category)),
Rainfall = 530
)
predict(modelo, datos_nuevos)
## 1
## 5838997
El modelo
está diciendo que la casa #1 tendría un precio estimado de
5,838,997
Comparado con
el precio real de 6,649,000
El modelo la
estimó un poco más baja
Pronóstico
2
#Casa nueva
datos_nuevos <- data.frame(
Dist_Taxi = 7000,
Dist_Market = 8000,
Dist_Hospital = 10000,
Carpet = 1800,
Builtup = 2100,
Parking = factor("Covered", levels=levels(data$Parking)),
City_Category = factor("CAT A", levels=levels(data$City_Category)),
Rainfall = 700
)
predict(modelo, datos_nuevos)
## 1
## 8227645
El modelo
estima que una casa con esas características tendría un precio
aproximado de 8,227,645
LS0tCnRpdGxlOiAiUmVncmVzacOzbiBMaW5lYWwgY2FzYSIKYXV0aG9yOiAiTmF0YWxpYSBTb2bDrWEgTWFydMOtbmV6IERvbcOtbmd1ZXoiCmRhdGU6ICIyMDI2LTAyLTE3IgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIHRoZW1lOiBjb3NtbwotLS0KCiMgKipSRUdSRVNJw5NOIExJTkVBTCoqCgpgYGB7ciB9CgojIEltcG9ydGFyIGJhc2UgZGUgZGF0b3MKICBkYXRhIDwtIHJlYWQuY3N2KCIvVXNlcnMvbmF0YWxpYW1hcnRpbmV6L0Rlc2t0b3AvSG91c2VQcmljZURhdGEuY3N2IikKCiMgRXhwbG9yYXIgZGF0b3MKc3RyKGRhdGEpCnN1bW1hcnkoZGF0YSkKCiNBc2VndXJhIHF1ZSBsYXMgY2F0ZWdvcsOtYXMgc2VhbiBmYWN0b3JlcyAoYW50ZXMgZGUgcHJlZGVjaXIpCmRhdGEkUGFya2luZyA8LSBhcy5mYWN0b3IoZGF0YSRQYXJraW5nKQpkYXRhJENpdHlfQ2F0ZWdvcnkgPC0gYXMuZmFjdG9yKGRhdGEkQ2l0eV9DYXRlZ29yeSkKCiNNb2RlbG8gZGUgcmVncmVzacOzbgptb2RlbG8gPC0gbG0oCiAgSG91c2VfUHJpY2UgfiBEaXN0X1RheGkgKyBEaXN0X01hcmtldCArIERpc3RfSG9zcGl0YWwgKwogICAgQ2FycGV0ICsgQnVpbHR1cCArIFBhcmtpbmcgKyBDaXR5X0NhdGVnb3J5ICsgUmFpbmZhbGwsCiAgZGF0YSA9IGRhdGEKKQoKc3VtbWFyeShtb2RlbG8pCmBgYAoKIyAqKlByb27Ds3N0aWNvIDEqKgoKYGBge3IgfQojQ2FzYSAxCmRhdG9zX251ZXZvcyA8LSBkYXRhLmZyYW1lKAogIERpc3RfVGF4aSA9IDk3OTYsCiAgRGlzdF9NYXJrZXQgPSA1MjUwLAogIERpc3RfSG9zcGl0YWwgPSAxMDcwMywKICBDYXJwZXQgPSAxNjU5LAogIEJ1aWx0dXAgPSAxOTYxLAogIFBhcmtpbmcgPSBmYWN0b3IoIk9wZW4iLCBsZXZlbHMgPSBsZXZlbHMoZGF0YSRQYXJraW5nKSksCiAgQ2l0eV9DYXRlZ29yeSA9IGZhY3RvcigiQ0FUIEIiLCBsZXZlbHMgPSBsZXZlbHMoZGF0YSRDaXR5X0NhdGVnb3J5KSksCiAgUmFpbmZhbGwgPSA1MzAKKQoKcHJlZGljdChtb2RlbG8sIGRhdG9zX251ZXZvcykKYGBgCgojIyMjIyMgRWwgbW9kZWxvIGVzdMOhIGRpY2llbmRvIHF1ZSBsYSBjYXNhICMxIHRlbmRyw61hIHVuIHByZWNpbyBlc3RpbWFkbyBkZSA1LDgzOCw5OTcKCiMjIyMjIyBDb21wYXJhZG8gY29uIGVsIHByZWNpbyByZWFsIGRlIDYsNjQ5LDAwMAoKIyMjIyMjIEVsIG1vZGVsbyBsYSBlc3RpbcOzIHVuIHBvY28gbcOhcyBiYWphCgojICoqUHJvbsOzc3RpY28gMioqCgpgYGB7ciB9CiNDYXNhIG51ZXZhCmRhdG9zX251ZXZvcyA8LSBkYXRhLmZyYW1lKAogIERpc3RfVGF4aSA9IDcwMDAsCiAgRGlzdF9NYXJrZXQgPSA4MDAwLAogIERpc3RfSG9zcGl0YWwgPSAxMDAwMCwKICBDYXJwZXQgPSAxODAwLAogIEJ1aWx0dXAgPSAyMTAwLAogIFBhcmtpbmcgPSBmYWN0b3IoIkNvdmVyZWQiLCBsZXZlbHM9bGV2ZWxzKGRhdGEkUGFya2luZykpLAogIENpdHlfQ2F0ZWdvcnkgPSBmYWN0b3IoIkNBVCBBIiwgbGV2ZWxzPWxldmVscyhkYXRhJENpdHlfQ2F0ZWdvcnkpKSwKICBSYWluZmFsbCA9IDcwMAopCnByZWRpY3QobW9kZWxvLCBkYXRvc19udWV2b3MpCmBgYAoKIyMjIyMjIEVsIG1vZGVsbyBlc3RpbWEgcXVlIHVuYSBjYXNhIGNvbiBlc2FzIGNhcmFjdGVyw61zdGljYXMgdGVuZHLDrWEgdW4gcHJlY2lvIGFwcm94aW1hZG8gZGUgOCwyMjcsNjQ1Cg==