# Lectura de la data
data <- readr::read_csv(file = "Precio de casas en Alemania.csv", locale = readr::locale(encoding = "UTF-8"))

# Descripcion de variables
skimr::skim(data)
Data summary
Name data
Number of rows 10552
Number of columns 26
_______________________
Column type frequency:
character 13
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Type 402 0.96 5 20 0 11 0
Free_of_Relation 3571 0.66 1 47 0 704 0
Furnishing_quality 2726 0.74 5 7 0 4 0
Condition 323 0.97 6 36 0 10 0
Heating 584 0.94 9 20 0 13 0
Energy_source 1227 0.88 2 34 0 104 0
Energy_certificate 755 0.93 9 24 0 3 0
Energy_certificate_type 3526 0.67 18 23 0 2 0
Energy_efficiency_class 4819 0.54 1 2 0 9 0
State 1 1.00 6 22 0 16 0
City 1 1.00 3 43 0 534 0
Place 290 0.97 3 41 0 4762 0
Garagetype 1960 0.81 6 23 0 7 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
X1 0 1.00 5275.50 3046.24 0.0 2637.75 5275.50 7913.25 10551.00 ▇▇▇▇▇
Price 0 1.00 556685.09 608741.00 0.0 250000.00 405215.00 655000.00 13000000.00 ▇▁▁▁▁
Living_space 0 1.00 216.72 172.42 0.0 130.00 176.78 250.00 5600.00 ▇▁▁▁▁
Lot 0 1.00 1491.66 8582.36 0.0 370.00 656.50 1047.00 547087.00 ▇▁▁▁▁
Usable_area 4984 0.53 134.30 188.81 0.0 48.00 80.00 150.25 4034.00 ▇▁▁▁▁
Rooms 0 1.00 7.39 5.38 1.0 5.00 6.00 8.00 170.00 ▇▁▁▁▁
Bedrooms 3674 0.65 4.17 2.58 0.0 3.00 4.00 5.00 61.00 ▇▁▁▁▁
Bathrooms 1801 0.83 2.31 1.74 0.0 1.00 2.00 3.00 44.00 ▇▁▁▁▁
Floors 2664 0.75 2.28 0.82 0.0 2.00 2.00 3.00 13.00 ▇▅▁▁▁
Year_built 694 0.93 1958.82 55.96 1300.0 1935.00 1971.00 1996.00 2022.00 ▁▁▁▁▇
Year_renovated 5203 0.51 2010.71 10.55 1900.0 2006.00 2015.00 2018.00 2206.00 ▁▇▁▁▁
Energy_consumption 8119 0.23 117.66 54.02 5.1 83.17 112.70 146.00 503.94 ▆▇▁▁▁
Garages 1960 0.81 2.70 3.20 1.0 1.00 2.00 3.00 70.00 ▇▁▁▁▁
# Seleccion de 4 variables cuantitativas y 1 cualtiativa

fil.data <- 
  dplyr::select(data, c(Price, Lot, Rooms, Garages, Energy_certificate)) %>% 
  tidyr::drop_na()
head(fil.data)

Regresion lineal multiple

RNGkind(sample.kind = "Rejection")

# Estimando el modelo de Regresion Lineal Multiple

lm1 <- lm(formula = Price ~ ., data = fil.data)
summary(lm1)
## 
## Call:
## lm(formula = Price ~ ., data = fil.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2092101  -268776  -104793   104980 12339002 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                308435.208  12373.430  24.927
## Lot                                             9.085      1.324   6.862
## Rooms                                       31894.638   1562.013  20.419
## Garages                                      9479.250   2178.076   4.352
## Energy_certificateavailable for inspection -68903.095  18784.816  -3.668
## Energy_certificatenot required by law       25668.715  16479.333   1.558
##                                            Pr(>|t|)    
## (Intercept)                                 < 2e-16 ***
## Lot                                        7.28e-12 ***
## Rooms                                       < 2e-16 ***
## Garages                                    1.36e-05 ***
## Energy_certificateavailable for inspection 0.000246 ***
## Energy_certificatenot required by law      0.119360    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 543000 on 8100 degrees of freedom
## Multiple R-squared:  0.08255,    Adjusted R-squared:  0.08199 
## F-statistic: 145.8 on 5 and 8100 DF,  p-value: < 2.2e-16
# Estimando el modelo de Regresion Lineal Multiple: Caret

set.seed(100)
lm2 <- caret::train(Price ~ ., data = fil.data, method = "lm")
lm2
## Linear Regression 
## 
## 8106 samples
##    4 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 8106, 8106, 8106, 8106, 8106, 8106, ... 
## Resampling results:
## 
##   RMSE      Rsquared    MAE     
##   535613.2  0.08901707  308677.8
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

Regresion Ridge

RNGkind(sample.kind = "Rejection")

x <- model.matrix(Price ~ ., data = fil.data)[ , -1]
y <- fil.data$Price


reg.ridge <- glmnet::glmnet(x = x, y = y, alpha = 0)
plot(reg.ridge, xvar = "lambda")

set.seed(100)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 0, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 15207.21
# Para hallar el modelo de regresion Ridge usando el valor optimo de lambda

reg.ridge.m1 <- glmnet::glmnet(x = x, y = y, alpha = 0, lambda = best.lambda)
coef(reg.ridge.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                                                       s0
## (Intercept)                                314094.692753
## Lot                                             8.914559
## Rooms                                       30999.232798
## Garages                                      9755.702545
## Energy_certificateavailable for inspection -66710.495294
## Energy_certificatenot required by law       25856.509402

Regresion Lasso

reg.lasso <- glmnet::glmnet(x = x, y = y, alpha = 1)
plot(reg.lasso, xvar = "lambda")

# Para hallar el valor optimo de lambda se usa validacion cruzada

set.seed(200)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 1, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 572.5416
# Para hallar el modelo de regresion Lasso usando el valor optimo de lambda

reg.lasso.m1 <- glmnet::glmnet(x = x, y = y, alpha = 1, lambda = best.lambda)
coef(reg.lasso.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                                                       s0
## (Intercept)                                309506.036377
## Lot                                             8.986592
## Rooms                                       31810.184626
## Garages                                      9357.858858
## Energy_certificateavailable for inspection -67254.261994
## Energy_certificatenot required by law       24417.336791
LS0tDQp0aXRsZTogIlRyYWJham8gMiINCmF1dGhvcjogIkNhcmxvIFZlZ2EiDQpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6IA0KICAgIGh0bWxfZG9jdW1lbnQ6DQogICAgICAgIGRmX3ByaW50OiBwYWdlZA0KICAgICAgICB0b2M6IHRydWUNCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICAgICBjb2RlX2ZvbGRpbmc6ICJzaG93Ig0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGluY2x1ZGU9RkFMU0V9DQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShNQVNTKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoSVNMUikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShza2ltcikNCg0KYGBgDQoNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KIyBMZWN0dXJhIGRlIGxhIGRhdGENCmRhdGEgPC0gcmVhZHI6OnJlYWRfY3N2KGZpbGUgPSAiUHJlY2lvIGRlIGNhc2FzIGVuIEFsZW1hbmlhLmNzdiIsIGxvY2FsZSA9IHJlYWRyOjpsb2NhbGUoZW5jb2RpbmcgPSAiVVRGLTgiKSkNCg0KIyBEZXNjcmlwY2lvbiBkZSB2YXJpYWJsZXMNCnNraW1yOjpza2ltKGRhdGEpDQoNCiMgU2VsZWNjaW9uIGRlIDQgdmFyaWFibGVzIGN1YW50aXRhdGl2YXMgeSAxIGN1YWx0aWF0aXZhDQoNCmZpbC5kYXRhIDwtIA0KICBkcGx5cjo6c2VsZWN0KGRhdGEsIGMoUHJpY2UsIExvdCwgUm9vbXMsIEdhcmFnZXMsIEVuZXJneV9jZXJ0aWZpY2F0ZSkpICU+JSANCiAgdGlkeXI6OmRyb3BfbmEoKQ0KaGVhZChmaWwuZGF0YSkNCg0KYGBgDQoNCg0KIyMgUmVncmVzaW9uIGxpbmVhbCBtdWx0aXBsZQ0KDQpgYGB7ciB9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KIyBFc3RpbWFuZG8gZWwgbW9kZWxvIGRlIFJlZ3Jlc2lvbiBMaW5lYWwgTXVsdGlwbGUNCg0KbG0xIDwtIGxtKGZvcm11bGEgPSBQcmljZSB+IC4sIGRhdGEgPSBmaWwuZGF0YSkNCnN1bW1hcnkobG0xKQ0KDQojIEVzdGltYW5kbyBlbCBtb2RlbG8gZGUgUmVncmVzaW9uIExpbmVhbCBNdWx0aXBsZTogQ2FyZXQNCg0Kc2V0LnNlZWQoMTAwKQ0KbG0yIDwtIGNhcmV0Ojp0cmFpbihQcmljZSB+IC4sIGRhdGEgPSBmaWwuZGF0YSwgbWV0aG9kID0gImxtIikNCmxtMg0KDQoNCmBgYA0KDQojIyBSZWdyZXNpb24gUmlkZ2UNCg0KYGBge3J9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KeCA8LSBtb2RlbC5tYXRyaXgoUHJpY2UgfiAuLCBkYXRhID0gZmlsLmRhdGEpWyAsIC0xXQ0KeSA8LSBmaWwuZGF0YSRQcmljZQ0KDQoNCnJlZy5yaWRnZSA8LSBnbG1uZXQ6OmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMCkNCnBsb3QocmVnLnJpZGdlLCB4dmFyID0gImxhbWJkYSIpDQoNCnNldC5zZWVkKDEwMCkNCmN2Lm91dCA8LSBnbG1uZXQ6OmN2LmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMCwgdHlwZS5tZWFzdXJlID0gIm1zZSIpDQpiZXN0LmxhbWJkYSA8LSBjdi5vdXQkbGFtYmRhLm1pbjtiZXN0LmxhbWJkYQ0KDQojIFBhcmEgaGFsbGFyIGVsIG1vZGVsbyBkZSByZWdyZXNpb24gUmlkZ2UgdXNhbmRvIGVsIHZhbG9yIG9wdGltbyBkZSBsYW1iZGENCg0KcmVnLnJpZGdlLm0xIDwtIGdsbW5ldDo6Z2xtbmV0KHggPSB4LCB5ID0geSwgYWxwaGEgPSAwLCBsYW1iZGEgPSBiZXN0LmxhbWJkYSkNCmNvZWYocmVnLnJpZGdlLm0xKQ0KDQpgYGANCg0KIyMgUmVncmVzaW9uIExhc3NvDQoNCmBgYHtyfQ0KDQpyZWcubGFzc28gPC0gZ2xtbmV0OjpnbG1uZXQoeCA9IHgsIHkgPSB5LCBhbHBoYSA9IDEpDQpwbG90KHJlZy5sYXNzbywgeHZhciA9ICJsYW1iZGEiKQ0KDQojIFBhcmEgaGFsbGFyIGVsIHZhbG9yIG9wdGltbyBkZSBsYW1iZGEgc2UgdXNhIHZhbGlkYWNpb24gY3J1emFkYQ0KDQpzZXQuc2VlZCgyMDApDQpjdi5vdXQgPC0gZ2xtbmV0Ojpjdi5nbG1uZXQoeCA9IHgsIHkgPSB5LCBhbHBoYSA9IDEsIHR5cGUubWVhc3VyZSA9ICJtc2UiKQ0KYmVzdC5sYW1iZGEgPC0gY3Yub3V0JGxhbWJkYS5taW47YmVzdC5sYW1iZGENCg0KIyBQYXJhIGhhbGxhciBlbCBtb2RlbG8gZGUgcmVncmVzaW9uIExhc3NvIHVzYW5kbyBlbCB2YWxvciBvcHRpbW8gZGUgbGFtYmRhDQoNCnJlZy5sYXNzby5tMSA8LSBnbG1uZXQ6OmdsbW5ldCh4ID0geCwgeSA9IHksIGFscGhhID0gMSwgbGFtYmRhID0gYmVzdC5sYW1iZGEpDQpjb2VmKHJlZy5sYXNzby5tMSkNCg0KYGBgDQoNCg==