# Lectura de la data
data <- readr::read_csv(file = "Precio de casas en Alemania.csv", locale = readr::locale(encoding = "UTF-8"))
# Descripcion de variables
skimr::skim(data)
Data summary
| Name |
data |
| Number of rows |
10552 |
| Number of columns |
26 |
| _______________________ |
|
| Column type frequency: |
|
| character |
13 |
| numeric |
13 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Type |
402 |
0.96 |
5 |
20 |
0 |
11 |
0 |
| Free_of_Relation |
3571 |
0.66 |
1 |
47 |
0 |
704 |
0 |
| Furnishing_quality |
2726 |
0.74 |
5 |
7 |
0 |
4 |
0 |
| Condition |
323 |
0.97 |
6 |
36 |
0 |
10 |
0 |
| Heating |
584 |
0.94 |
9 |
20 |
0 |
13 |
0 |
| Energy_source |
1227 |
0.88 |
2 |
34 |
0 |
104 |
0 |
| Energy_certificate |
755 |
0.93 |
9 |
24 |
0 |
3 |
0 |
| Energy_certificate_type |
3526 |
0.67 |
18 |
23 |
0 |
2 |
0 |
| Energy_efficiency_class |
4819 |
0.54 |
1 |
2 |
0 |
9 |
0 |
| State |
1 |
1.00 |
6 |
22 |
0 |
16 |
0 |
| City |
1 |
1.00 |
3 |
43 |
0 |
534 |
0 |
| Place |
290 |
0.97 |
3 |
41 |
0 |
4762 |
0 |
| Garagetype |
1960 |
0.81 |
6 |
23 |
0 |
7 |
0 |
Variable type: numeric
| X1 |
0 |
1.00 |
5275.50 |
3046.24 |
0.0 |
2637.75 |
5275.50 |
7913.25 |
10551.00 |
▇▇▇▇▇ |
| Price |
0 |
1.00 |
556685.09 |
608741.00 |
0.0 |
250000.00 |
405215.00 |
655000.00 |
13000000.00 |
▇▁▁▁▁ |
| Living_space |
0 |
1.00 |
216.72 |
172.42 |
0.0 |
130.00 |
176.78 |
250.00 |
5600.00 |
▇▁▁▁▁ |
| Lot |
0 |
1.00 |
1491.66 |
8582.36 |
0.0 |
370.00 |
656.50 |
1047.00 |
547087.00 |
▇▁▁▁▁ |
| Usable_area |
4984 |
0.53 |
134.30 |
188.81 |
0.0 |
48.00 |
80.00 |
150.25 |
4034.00 |
▇▁▁▁▁ |
| Rooms |
0 |
1.00 |
7.39 |
5.38 |
1.0 |
5.00 |
6.00 |
8.00 |
170.00 |
▇▁▁▁▁ |
| Bedrooms |
3674 |
0.65 |
4.17 |
2.58 |
0.0 |
3.00 |
4.00 |
5.00 |
61.00 |
▇▁▁▁▁ |
| Bathrooms |
1801 |
0.83 |
2.31 |
1.74 |
0.0 |
1.00 |
2.00 |
3.00 |
44.00 |
▇▁▁▁▁ |
| Floors |
2664 |
0.75 |
2.28 |
0.82 |
0.0 |
2.00 |
2.00 |
3.00 |
13.00 |
▇▅▁▁▁ |
| Year_built |
694 |
0.93 |
1958.82 |
55.96 |
1300.0 |
1935.00 |
1971.00 |
1996.00 |
2022.00 |
▁▁▁▁▇ |
| Year_renovated |
5203 |
0.51 |
2010.71 |
10.55 |
1900.0 |
2006.00 |
2015.00 |
2018.00 |
2206.00 |
▁▇▁▁▁ |
| Energy_consumption |
8119 |
0.23 |
117.66 |
54.02 |
5.1 |
83.17 |
112.70 |
146.00 |
503.94 |
▆▇▁▁▁ |
| Garages |
1960 |
0.81 |
2.70 |
3.20 |
1.0 |
1.00 |
2.00 |
3.00 |
70.00 |
▇▁▁▁▁ |
# Seleccion de 4 variables cuantitativas y 1 cualtiativa
fil.data <-
dplyr::select(data, c(Price, Lot, Rooms, Garages, Energy_certificate)) %>%
tidyr::drop_na()
head(fil.data)
Regresion lineal multiple
RNGkind(sample.kind = "Rejection")
# Estimando el modelo de Regresion Lineal Multiple
lm1 <- lm(formula = Price ~ ., data = fil.data)
summary(lm1)
##
## Call:
## lm(formula = Price ~ ., data = fil.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2092101 -268776 -104793 104980 12339002
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 308435.208 12373.430 24.927
## Lot 9.085 1.324 6.862
## Rooms 31894.638 1562.013 20.419
## Garages 9479.250 2178.076 4.352
## Energy_certificateavailable for inspection -68903.095 18784.816 -3.668
## Energy_certificatenot required by law 25668.715 16479.333 1.558
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Lot 7.28e-12 ***
## Rooms < 2e-16 ***
## Garages 1.36e-05 ***
## Energy_certificateavailable for inspection 0.000246 ***
## Energy_certificatenot required by law 0.119360
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 543000 on 8100 degrees of freedom
## Multiple R-squared: 0.08255, Adjusted R-squared: 0.08199
## F-statistic: 145.8 on 5 and 8100 DF, p-value: < 2.2e-16
# Estimando el modelo de Regresion Lineal Multiple: Caret
set.seed(100)
lm2 <- caret::train(Price ~ ., data = fil.data, method = "lm")
lm2
## Linear Regression
##
## 8106 samples
## 4 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 8106, 8106, 8106, 8106, 8106, 8106, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 535613.2 0.08901707 308677.8
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Regresion Ridge
RNGkind(sample.kind = "Rejection")
x <- model.matrix(Price ~ ., data = fil.data)[ , -1]
y <- fil.data$Price
reg.ridge <- glmnet::glmnet(x = x, y = y, alpha = 0)
plot(reg.ridge, xvar = "lambda")

set.seed(100)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 0, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 15207.21
# Para hallar el modelo de regresion Ridge usando el valor optimo de lambda
reg.ridge.m1 <- glmnet::glmnet(x = x, y = y, alpha = 0, lambda = best.lambda)
coef(reg.ridge.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 314094.692753
## Lot 8.914559
## Rooms 30999.232798
## Garages 9755.702545
## Energy_certificateavailable for inspection -66710.495294
## Energy_certificatenot required by law 25856.509402
Regresion Lasso
reg.lasso <- glmnet::glmnet(x = x, y = y, alpha = 1)
plot(reg.lasso, xvar = "lambda")

# Para hallar el valor optimo de lambda se usa validacion cruzada
set.seed(200)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 1, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 572.5416
# Para hallar el modelo de regresion Lasso usando el valor optimo de lambda
reg.lasso.m1 <- glmnet::glmnet(x = x, y = y, alpha = 1, lambda = best.lambda)
coef(reg.lasso.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 309506.036377
## Lot 8.986592
## Rooms 31810.184626
## Garages 9357.858858
## Energy_certificateavailable for inspection -67254.261994
## Energy_certificatenot required by law 24417.336791
LS0tDQp0aXRsZTogIlRyYWJham8gMiINCmF1dGhvcjogIkNhcmxvIFZlZ2EiDQpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6IA0KICAgIGh0bWxfZG9jdW1lbnQ6DQogICAgICAgIGRmX3ByaW50OiBwYWdlZA0KICAgICAgICB0b2M6IHRydWUNCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICAgICBjb2RlX2ZvbGRpbmc6ICJzaG93Ig0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGluY2x1ZGU9RkFMU0V9DQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShNQVNTKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoSVNMUikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShza2ltcikNCg0KYGBgDQoNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KIyBMZWN0dXJhIGRlIGxhIGRhdGENCmRhdGEgPC0gcmVhZHI6OnJlYWRfY3N2KGZpbGUgPSAiUHJlY2lvIGRlIGNhc2FzIGVuIEFsZW1hbmlhLmNzdiIsIGxvY2FsZSA9IHJlYWRyOjpsb2NhbGUoZW5jb2RpbmcgPSAiVVRGLTgiKSkNCg0KIyBEZXNjcmlwY2lvbiBkZSB2YXJpYWJsZXMNCnNraW1yOjpza2ltKGRhdGEpDQoNCiMgU2VsZWNjaW9uIGRlIDQgdmFyaWFibGVzIGN1YW50aXRhdGl2YXMgeSAxIGN1YWx0aWF0aXZhDQoNCmZpbC5kYXRhIDwtIA0KICBkcGx5cjo6c2VsZWN0KGRhdGEsIGMoUHJpY2UsIExvdCwgUm9vbXMsIEdhcmFnZXMsIEVuZXJneV9jZXJ0aWZpY2F0ZSkpICU+JSANCiAgdGlkeXI6OmRyb3BfbmEoKQ0KaGVhZChmaWwuZGF0YSkNCg0KYGBgDQoNCg0KIyMgUmVncmVzaW9uIGxpbmVhbCBtdWx0aXBsZQ0KDQpgYGB7ciB9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KIyBFc3RpbWFuZG8gZWwgbW9kZWxvIGRlIFJlZ3Jlc2lvbiBMaW5lYWwgTXVsdGlwbGUNCg0KbG0xIDwtIGxtKGZvcm11bGEgPSBQcmljZSB+IC4sIGRhdGEgPSBmaWwuZGF0YSkNCnN1bW1hcnkobG0xKQ0KDQojIEVzdGltYW5kbyBlbCBtb2RlbG8gZGUgUmVncmVzaW9uIExpbmVhbCBNdWx0aXBsZTogQ2FyZXQNCg0Kc2V0LnNlZWQoMTAwKQ0KbG0yIDwtIGNhcmV0Ojp0cmFpbihQcmljZSB+IC4sIGRhdGEgPSBmaWwuZGF0YSwgbWV0aG9kID0gImxtIikNCmxtMg0KDQoNCmBgYA0KDQojIyBSZWdyZXNpb24gUmlkZ2UNCg0KYGBge3J9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KeCA8LSBtb2RlbC5tYXRyaXgoUHJpY2UgfiAuLCBkYXRhID0gZmlsLmRhdGEpWyAsIC0xXQ0KeSA8LSBmaWwuZGF0YSRQcmljZQ0KDQoNCnJlZy5yaWRnZSA8LSBnbG1uZXQ6OmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMCkNCnBsb3QocmVnLnJpZGdlLCB4dmFyID0gImxhbWJkYSIpDQoNCnNldC5zZWVkKDEwMCkNCmN2Lm91dCA8LSBnbG1uZXQ6OmN2LmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMCwgdHlwZS5tZWFzdXJlID0gIm1zZSIpDQpiZXN0LmxhbWJkYSA8LSBjdi5vdXQkbGFtYmRhLm1pbjtiZXN0LmxhbWJkYQ0KDQojIFBhcmEgaGFsbGFyIGVsIG1vZGVsbyBkZSByZWdyZXNpb24gUmlkZ2UgdXNhbmRvIGVsIHZhbG9yIG9wdGltbyBkZSBsYW1iZGENCg0KcmVnLnJpZGdlLm0xIDwtIGdsbW5ldDo6Z2xtbmV0KHggPSB4LCB5ID0geSwgYWxwaGEgPSAwLCBsYW1iZGEgPSBiZXN0LmxhbWJkYSkNCmNvZWYocmVnLnJpZGdlLm0xKQ0KDQpgYGANCg0KIyMgUmVncmVzaW9uIExhc3NvDQoNCmBgYHtyfQ0KDQpyZWcubGFzc28gPC0gZ2xtbmV0OjpnbG1uZXQoeCA9IHgsIHkgPSB5LCBhbHBoYSA9IDEpDQpwbG90KHJlZy5sYXNzbywgeHZhciA9ICJsYW1iZGEiKQ0KDQojIFBhcmEgaGFsbGFyIGVsIHZhbG9yIG9wdGltbyBkZSBsYW1iZGEgc2UgdXNhIHZhbGlkYWNpb24gY3J1emFkYQ0KDQpzZXQuc2VlZCgyMDApDQpjdi5vdXQgPC0gZ2xtbmV0Ojpjdi5nbG1uZXQoeCA9IHgsIHkgPSB5LCBhbHBoYSA9IDEsIHR5cGUubWVhc3VyZSA9ICJtc2UiKQ0KYmVzdC5sYW1iZGEgPC0gY3Yub3V0JGxhbWJkYS5taW47YmVzdC5sYW1iZGENCg0KIyBQYXJhIGhhbGxhciBlbCBtb2RlbG8gZGUgcmVncmVzaW9uIExhc3NvIHVzYW5kbyBlbCB2YWxvciBvcHRpbW8gZGUgbGFtYmRhDQoNCnJlZy5sYXNzby5tMSA8LSBnbG1uZXQ6OmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMSwgbGFtYmRhID0gYmVzdC5sYW1iZGEpDQpjb2VmKHJlZy5sYXNzby5tMSkNCg0KYGBgDQoNCg==