# Cargar al environment
library(readr)
HousePriceData <- read_csv("~/Conexión de interfaces/Conexión de interfaces/HousePriceData.csv")
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Parking, City_Category
## dbl (8): Observation, Dist_Taxi, Dist_Market, Dist_Hospital, Carpet, Builtup...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# file.choose("~/Conexión de interfaces/Conexión de interfaces/rentadebicis.csv")
data <- HousePriceData
# Quitar el valor atípico, la casa de 150 millones
max_price <- max(data$House_Price, na.rm = TRUE)
data <- subset(data, House_Price < max_price)
str(data)
## tibble [904 × 10] (S3: tbl_df/tbl/data.frame)
## $ Observation : num [1:904] 1 2 3 4 5 6 7 8 9 10 ...
## $ Dist_Taxi : num [1:904] 9796 8294 11001 8301 10510 ...
## $ Dist_Market : num [1:904] 5250 8186 14399 11188 12629 ...
## $ Dist_Hospital: num [1:904] 10703 12694 16991 12289 13921 ...
## $ Carpet : num [1:904] 1659 1461 1340 1451 1770 ...
## $ Builtup : num [1:904] 1961 1752 1609 1748 2111 ...
## $ Parking : chr [1:904] "Open" "Not Provided" "Not Provided" "Covered" ...
## $ City_Category: chr [1:904] "CAT B" "CAT B" "CAT A" "CAT B" ...
## $ Rainfall : num [1:904] 530 210 720 620 450 760 1030 1020 680 1130 ...
## $ House_Price : num [1:904] 6649000 3982000 5401000 5373000 4662000 ...
summary(data)
## Observation Dist_Taxi Dist_Market Dist_Hospital Carpet
## Min. : 1.0 Min. : 146 Min. : 1666 Min. : 3227 Min. : 775
## 1st Qu.:236.8 1st Qu.: 6476 1st Qu.: 9366 1st Qu.:11302 1st Qu.:1317
## Median :469.5 Median : 8224 Median :11143 Median :13188 Median :1477
## Mean :468.5 Mean : 8222 Mean :11011 Mean :13079 Mean :1486
## 3rd Qu.:700.2 3rd Qu.: 9936 3rd Qu.:12668 3rd Qu.:14851 3rd Qu.:1653
## Max. :932.0 Max. :16850 Max. :18281 Max. :22407 Max. :2229
## NA's :7
## Builtup Parking City_Category Rainfall
## Min. : 932 Length:904 Length:904 Min. :-110.0
## 1st Qu.:1578 Class :character Class :character 1st Qu.: 600.0
## Median :1774 Mode :character Mode :character Median : 780.0
## Mean :1782 Mean : 786.5
## 3rd Qu.:1983 3rd Qu.: 970.0
## Max. :2667 Max. :1560.0
##
## House_Price
## Min. : 1492000
## 1st Qu.: 4622750
## Median : 5857000
## Mean : 5924793
## 3rd Qu.: 7187250
## Max. :11632000
##
regresión <- lm(House_Price ~ Carpet + Builtup + factor(Parking) + factor(City_Category), data = data)
summary(regresión)
##
## Call:
## lm(formula = House_Price ~ Carpet + Builtup + factor(Parking) +
## factor(City_Category), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3381492 -818649 -53067 775650 4251046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6493979.8 270908.2 23.971 < 2e-16 ***
## Carpet -582.7 3509.3 -0.166 0.868149
## Builtup 1170.2 2928.3 0.400 0.689532
## factor(Parking)No Parking -532427.0 139320.5 -3.822 0.000142 ***
## factor(Parking)Not Provided -442211.0 124560.1 -3.550 0.000405 ***
## factor(Parking)Open -240405.4 113849.8 -2.112 0.034999 *
## factor(City_Category)CAT B -1912181.8 96594.3 -19.796 < 2e-16 ***
## factor(City_Category)CAT C -2902000.6 106900.6 -27.147 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1238000 on 889 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.4865, Adjusted R-squared: 0.4825
## F-statistic: 120.3 on 7 and 889 DF, p-value: < 2.2e-16
datos_nuevos <- within(data.frame(Carpet=1477, Builtup=1774, Parking="Open", City_Category="CAT A"), { Parking <- factor(Parking, levels=levels(factor(data$Parking))); City_Category <- factor(City_Category, levels=levels(factor(data$City_Category))) })
predict(regresión, newdata = datos_nuevos)
## 1
## 7468835
Es un modelo altamente significativo y con poder explicativo muy alto, con un R² ajustado de 0.9407.
El Parking influye, ya que varias categorías salen con precio menor contra la referencia.
La categoría de ciudad pesa muchísimo, ya que CAT B y CAT C reducen fuerte el precio.