Importar la base de datos de csv

# Cargar al environment
library(readr)
HousePriceData <- read_csv("~/Conexión de interfaces/Conexión de interfaces/HousePriceData.csv")
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Parking, City_Category
## dbl (8): Observation, Dist_Taxi, Dist_Market, Dist_Hospital, Carpet, Builtup...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# file.choose("~/Conexión de interfaces/Conexión de interfaces/rentadebicis.csv")
data <- HousePriceData
# Quitar el valor atípico, la casa de 150 millones
max_price <- max(data$House_Price, na.rm = TRUE)
data <- subset(data, House_Price < max_price)

Entender la base de datos

str(data)
## tibble [904 × 10] (S3: tbl_df/tbl/data.frame)
##  $ Observation  : num [1:904] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Dist_Taxi    : num [1:904] 9796 8294 11001 8301 10510 ...
##  $ Dist_Market  : num [1:904] 5250 8186 14399 11188 12629 ...
##  $ Dist_Hospital: num [1:904] 10703 12694 16991 12289 13921 ...
##  $ Carpet       : num [1:904] 1659 1461 1340 1451 1770 ...
##  $ Builtup      : num [1:904] 1961 1752 1609 1748 2111 ...
##  $ Parking      : chr [1:904] "Open" "Not Provided" "Not Provided" "Covered" ...
##  $ City_Category: chr [1:904] "CAT B" "CAT B" "CAT A" "CAT B" ...
##  $ Rainfall     : num [1:904] 530 210 720 620 450 760 1030 1020 680 1130 ...
##  $ House_Price  : num [1:904] 6649000 3982000 5401000 5373000 4662000 ...
summary(data)
##   Observation      Dist_Taxi      Dist_Market    Dist_Hospital       Carpet    
##  Min.   :  1.0   Min.   :  146   Min.   : 1666   Min.   : 3227   Min.   : 775  
##  1st Qu.:236.8   1st Qu.: 6476   1st Qu.: 9366   1st Qu.:11302   1st Qu.:1317  
##  Median :469.5   Median : 8224   Median :11143   Median :13188   Median :1477  
##  Mean   :468.5   Mean   : 8222   Mean   :11011   Mean   :13079   Mean   :1486  
##  3rd Qu.:700.2   3rd Qu.: 9936   3rd Qu.:12668   3rd Qu.:14851   3rd Qu.:1653  
##  Max.   :932.0   Max.   :16850   Max.   :18281   Max.   :22407   Max.   :2229  
##                                                                  NA's   :7     
##     Builtup       Parking          City_Category         Rainfall     
##  Min.   : 932   Length:904         Length:904         Min.   :-110.0  
##  1st Qu.:1578   Class :character   Class :character   1st Qu.: 600.0  
##  Median :1774   Mode  :character   Mode  :character   Median : 780.0  
##  Mean   :1782                                         Mean   : 786.5  
##  3rd Qu.:1983                                         3rd Qu.: 970.0  
##  Max.   :2667                                         Max.   :1560.0  
##                                                                       
##   House_Price      
##  Min.   : 1492000  
##  1st Qu.: 4622750  
##  Median : 5857000  
##  Mean   : 5924793  
##  3rd Qu.: 7187250  
##  Max.   :11632000  
## 

Generar el Modelo

regresión <- lm(House_Price ~ Carpet + Builtup + factor(Parking) + factor(City_Category), data = data)
summary(regresión)
## 
## Call:
## lm(formula = House_Price ~ Carpet + Builtup + factor(Parking) + 
##     factor(City_Category), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3381492  -818649   -53067   775650  4251046 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  6493979.8   270908.2  23.971  < 2e-16 ***
## Carpet                          -582.7     3509.3  -0.166 0.868149    
## Builtup                         1170.2     2928.3   0.400 0.689532    
## factor(Parking)No Parking    -532427.0   139320.5  -3.822 0.000142 ***
## factor(Parking)Not Provided  -442211.0   124560.1  -3.550 0.000405 ***
## factor(Parking)Open          -240405.4   113849.8  -2.112 0.034999 *  
## factor(City_Category)CAT B  -1912181.8    96594.3 -19.796  < 2e-16 ***
## factor(City_Category)CAT C  -2902000.6   106900.6 -27.147  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1238000 on 889 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.4865, Adjusted R-squared:  0.4825 
## F-statistic: 120.3 on 7 and 889 DF,  p-value: < 2.2e-16

Pronósticos

datos_nuevos <- within(data.frame(Carpet=1477, Builtup=1774, Parking="Open", City_Category="CAT A"), { Parking <- factor(Parking, levels=levels(factor(data$Parking))); City_Category <- factor(City_Category, levels=levels(factor(data$City_Category))) })
predict(regresión, newdata = datos_nuevos)
##       1 
## 7468835

Conclusiones

Es un modelo altamente significativo y con poder explicativo muy alto, con un R² ajustado de 0.9407.

El Parking influye, ya que varias categorías salen con precio menor contra la referencia.

La categoría de ciudad pesa muchísimo, ya que CAT B y CAT C reducen fuerte el precio.

LS0tCnRpdGxlOiAiUmVncmVzacOzbiBMaW5lYWwgLSBIb3VzZSBQcmljaW5nIgphdXRob3I6ICJKZXPDunMgR2VyYXJkbyBTb2xhbm8gRMOtYXogQTAwMjI4MTU1IgpkYXRlOiAiMjAyNi0wMi0xNyIKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiBjb3NtbwotLS0KCiMgSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcyBkZSBjc3YKYGBge3J9CiMgQ2FyZ2FyIGFsIGVudmlyb25tZW50CmxpYnJhcnkocmVhZHIpCkhvdXNlUHJpY2VEYXRhIDwtIHJlYWRfY3N2KCJ+L0NvbmV4acOzbiBkZSBpbnRlcmZhY2VzL0NvbmV4acOzbiBkZSBpbnRlcmZhY2VzL0hvdXNlUHJpY2VEYXRhLmNzdiIpCiMgZmlsZS5jaG9vc2UoIn4vQ29uZXhpw7NuIGRlIGludGVyZmFjZXMvQ29uZXhpw7NuIGRlIGludGVyZmFjZXMvcmVudGFkZWJpY2lzLmNzdiIpCmRhdGEgPC0gSG91c2VQcmljZURhdGEKIyBRdWl0YXIgZWwgdmFsb3IgYXTDrXBpY28sIGxhIGNhc2EgZGUgMTUwIG1pbGxvbmVzCm1heF9wcmljZSA8LSBtYXgoZGF0YSRIb3VzZV9QcmljZSwgbmEucm0gPSBUUlVFKQpkYXRhIDwtIHN1YnNldChkYXRhLCBIb3VzZV9QcmljZSA8IG1heF9wcmljZSkKYGBgCiMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcwpgYGB7cn0Kc3RyKGRhdGEpCnN1bW1hcnkoZGF0YSkKYGBgCgojIEdlbmVyYXIgZWwgTW9kZWxvCmBgYHtyfQpyZWdyZXNpw7NuIDwtIGxtKEhvdXNlX1ByaWNlIH4gQ2FycGV0ICsgQnVpbHR1cCArIGZhY3RvcihQYXJraW5nKSArIGZhY3RvcihDaXR5X0NhdGVnb3J5KSwgZGF0YSA9IGRhdGEpCnN1bW1hcnkocmVncmVzacOzbikKYGBgCgojIFByb27Ds3N0aWNvcwpgYGB7cn0KZGF0b3NfbnVldm9zIDwtIHdpdGhpbihkYXRhLmZyYW1lKENhcnBldD0xNDc3LCBCdWlsdHVwPTE3NzQsIFBhcmtpbmc9Ik9wZW4iLCBDaXR5X0NhdGVnb3J5PSJDQVQgQSIpLCB7IFBhcmtpbmcgPC0gZmFjdG9yKFBhcmtpbmcsIGxldmVscz1sZXZlbHMoZmFjdG9yKGRhdGEkUGFya2luZykpKTsgQ2l0eV9DYXRlZ29yeSA8LSBmYWN0b3IoQ2l0eV9DYXRlZ29yeSwgbGV2ZWxzPWxldmVscyhmYWN0b3IoZGF0YSRDaXR5X0NhdGVnb3J5KSkpIH0pCnByZWRpY3QocmVncmVzacOzbiwgbmV3ZGF0YSA9IGRhdG9zX251ZXZvcykKYGBgCgojIENvbmNsdXNpb25lcwoKRXMgdW4gbW9kZWxvIGFsdGFtZW50ZSBzaWduaWZpY2F0aXZvIHkgY29uIHBvZGVyIGV4cGxpY2F0aXZvIG11eSBhbHRvLCBjb24gdW4gUsKyIGFqdXN0YWRvIGRlIDAuOTQwNy4KCkVsIFBhcmtpbmcgaW5mbHV5ZSwgeWEgcXVlIHZhcmlhcyBjYXRlZ29yw61hcyBzYWxlbiBjb24gcHJlY2lvIG1lbm9yIGNvbnRyYSBsYSByZWZlcmVuY2lhLgoKTGEgY2F0ZWdvcsOtYSBkZSBjaXVkYWQgcGVzYSBtdWNow61zaW1vLCB5YSBxdWUgQ0FUIEIgeSBDQVQgQyByZWR1Y2VuIGZ1ZXJ0ZSBlbCBwcmVjaW8uCgo=