Paquetes:
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(sp)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(viridis)
## Loading required package: viridisLite
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 2.4-0
## Loading required package: spatstat.random
## spatstat.random 2.2-0
## Loading required package: spatstat.core
## Loading required package: nlme
## Loading required package: rpart
## spatstat.core 2.4-4
## Loading required package: spatstat.linnet
## spatstat.linnet 2.3-2
##
## spatstat 2.3-4 (nickname: 'Watch this space')
## For an introduction to spatstat, type 'beginner'
library(leaflet)
library(rgdal)
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-32, (SVN revision 1176)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.3.2, released 2021/09/01
## Path to GDAL shared files: C:/Users/matir/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/matir/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-7
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(rgeos)
## rgeos version: 0.5-9, (SVN revision 684)
## GEOS runtime version: 3.9.1-CAPI-1.14.2
## Please note that rgeos will be retired by the end of 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## GEOS using OverlayNG
## Linking to sp version: 1.4-7
## Polygon checking: TRUE
library(tmap)
library(tmaptools)
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(grid)
##
## Attaching package: 'grid'
## The following object is masked from 'package:spatstat.geom':
##
## as.mask
library(rvest)
library(httr)
library(XML)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:spatstat.geom':
##
## shift
library(devtools)
## Loading required package: usethis
library(geosphere)
##
## Attaching package: 'geosphere'
## The following object is masked from 'package:spatstat.geom':
##
## perimeter
library(gridExtra)
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:nlme':
##
## getData
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:rgeos':
##
## intersect, setdiff, union
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ stringr 1.4.1
## ✔ tidyr 1.2.0 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks nlme::collapse()
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ tidyr::extract() masks raster::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ dplyr::select() masks raster::select()
## ✖ purrr::transpose() masks data.table::transpose()
Datos previos:
data <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/Propiedades_RM_sf.rds")
data2 <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/Permisos_2017.rds")
viviendas <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/viviendas_2017.rds")
personas <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/Clase 2/personas/personas_2017.rds")
censo_zc <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/zc_censo2017.rds")
censo_zc <- readRDS("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/zc_censo2017.rds")
comuna_indep <- c(13108)
comuna_renca <- c(13128)
comuna_conchali <- c(13104)
comuna_recoleta <- c(13127)
comuna_santiago_c <- c(13101)
# Bases Comuna Independencia
ind_propiedades <- data[data$codigo_comuna == 13108,]
ind_permisos <- data2[data2$COD_COMUNA == 13108,]
ind_viviendas <- viviendas[viviendas$COMUNA == 13108,]
ind_personas <- personas[personas$COMUNA == 13108,]
ind_personas_censo_zc <- censo_zc[censo_zc$COMUNA == 13108,]
# Base Comunas: Renca, Conchali, Recoleta y Santiago C.
c_v_propiedades <- data[data$codigo_comuna == c(13128,13104,13127,13101),]
c_v_permisos <- data2[data2$COD_COMUNA == c(13128,13104,13127,13101),]
## Warning in data2$COD_COMUNA == c(13128, 13104, 13127, 13101): longitud de objeto
## mayor no es múltiplo de la longitud de uno menor
c_v_viviendas <- viviendas[viviendas$COMUNA == c(13128,13104,13127,13101),]
## Warning in viviendas$COMUNA == c(13128, 13104, 13127, 13101): longitud de objeto
## mayor no es múltiplo de la longitud de uno menor
c_v_personas <- personas[personas$COMUNA == c(13128,13104,13127,13101),]
## Warning in personas$COMUNA == c(13128, 13104, 13127, 13101): longitud de objeto
## mayor no es múltiplo de la longitud de uno menor
c_v_personas_censo_zc <- censo_zc[censo_zc$COMUNA == c(13128,13104,13127,13101),]
## Warning in censo_zc$COMUNA == c(13128, 13104, 13127, 13101): longitud de objeto
## mayor no es múltiplo de la longitud de uno menor
# Para base sólo con la comuna Independencia:
ind_personas <-
ind_personas %>%
mutate( # crear variables
CODIGO_COMUNA = COMUNA * 10 ^ 6,
DISTRITO_CENSAL = DC * 10 ^ 4,
CODIGO_AREA = AREA * 10 ^ 3,
ZC_LOC,
ZONA = as.numeric(CODIGO_COMUNA + DISTRITO_CENSAL + CODIGO_AREA + ZC_LOC),
COD_VIV = ZONA* 10 ^ 4 + NVIV,)
# Para base con las demás comunas:
c_v_personas <-
c_v_personas %>%
mutate( # crear variables
CODIGO_COMUNA = COMUNA * 10 ^ 6,
DISTRITO_CENSAL = DC * 10 ^ 4,
CODIGO_AREA = AREA * 10 ^ 3,
ZC_LOC,
ZONA = as.numeric(CODIGO_COMUNA + DISTRITO_CENSAL + CODIGO_AREA + ZC_LOC),
COD_VIV = ZONA* 10 ^ 4 + NVIV,
)
# Base comuna Independencia:
ind_personas2 <- ind_personas %>%
dplyr::select(ZONA,P09,P15,P18,ESCOLARIDAD,P08,P11PAIS,P10PAIS,P07,P12,PERSONAN,NHOGAR)
colnames(ind_personas2) <- c("ZONA","EDAD","NIVEL_EDU","RAMA_TRABAJO","ESCOLARIDAD","SEXO","RES_5","RES_HAB","PARENTEZCO","L_NAC","N_PERSONAS","N_HOGAR")
# Base comuna contiguas:
c_v_personas2 <- c_v_personas %>%
dplyr::select(COMUNA,ZONA,P09,P15,P18,ESCOLARIDAD,P08,P11PAIS,P10PAIS,P07,P12,PERSONAN,NHOGAR)
colnames(c_v_personas2) <- c("COMUNA","ZONA","EDAD","NIVEL_EDU","RAMA_TRABAJO","ESCOLARIDAD","SEXO","RES_5","RES_HAB","PARENTEZCO","L_NAC","N_PERSONAS","N_HOGAR")
c_v_personas2$NOMBRE_COMUNA <- as.factor(ifelse(c_v_personas$COMUNA == 13101, 'Santiago',
ifelse(c_v_personas$COMUNA == 13104, 'Conchalí',
ifelse(c_v_personas$COMUNA == 13127, 'Recoleta',
ifelse(c_v_personas$COMUNA == 13128, 'Renca',"")))))
A modo de tener información actual sobre las propiedades que están en venta en Independencia y sus valores, se realizó “webscrapping” en cinco páginas inmobiliarias. En base a ellas se compiló las siguientes bases con información de casas y departamentos en Independencia:
data_depto_ws <- read.csv("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/ws/ventas_depto_indep.csv")
data_casas_ws <- read.csv("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/ws/ventas_casas_indep.csv")
data_latlon_depto_ws <- read.csv("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/ws/lat_lon_deptos (1).csv")
data_latlon_casas_ws <- read.csv("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/ws/lat_lon_casas (1).csv")
Uniremos las bases con información de las casas y deptos con la latitud y longitud de las mismas a modo de poder geolocalizarlas posteriormente en el estudio:
precios_depto <- data_depto_ws %>%
left_join(data_latlon_depto_ws, by = "ID")
precios_casa <- data_casas_ws %>%
left_join(data_latlon_casas_ws, by = "ID")
precios_depto <- st_as_sf(precios_depto, coords = c('Longitude','Latitude'), crs = 4326, agr = 'identity')
precios_casa <- st_as_sf(precios_casa, coords = c('Longitude','Latitude'), crs = 4326, agr = 'identity')
Finalmente, se debe eliminar la columna que posee datos repetidos, en este caso, la dirección de las propiedades:
precios_casa <- precios_casa[,-8]
precios_depto <- precios_depto[,-8]
#PREGUNTA 1: Comparar, a nivel de zona censal, la evolución de los precios entre la BBDD subida a clases y el web scrapping realizado. La medida será la variación del valor UF/m2 para una propiedad tipo1.
Con la BBDD del profesor: Analsis variable uf/m2 de comunas colindantes con Independencia:
ae4 <- data_1_deptos <- data %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
filter(codigo_comuna == c(13108,
13128,
13104,
13127,
13101)) %>%
group_by(codigo_zona) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2)) %>%
dplyr::select(codigo_zona, uf_m2_promedio) %>%
left_join(censo_zc, by = c("codigo_zona"="COD_INE_15"))
ae4 <- st_sf(ae4)
tm_shape(ae4) + tm_fill("uf_m2_promedio",palette = "Reds", title="", n=5, style = "quantile") + tm_borders() + tm_layout(legend.position = c("left", "bottom"))+ tm_layout(main.title = "Precio Promedio por Zona para Departamentos", main.title.size = 0.9,main.title.position="left")
ae4$NOMBRE_COMUNA <- as.factor(ifelse(ae4$COMUNA == 13108, "Independencia",
ifelse(ae4$COMUNA == 13101, 'Santiago',
ifelse(ae4$COMUNA == 13104, 'Conchalí',
ifelse(ae4$COMUNA == 13127, 'Recoleta',
ifelse(ae4$COMUNA == 13128, 'Renca',""))))))
tm_shape(ae4) + tm_polygons("NOMBRE_COMUNA")
ae4_conchali <- data %>% filter(codigo_comuna == c(13104))%>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae4_recoleta <- data %>% filter(codigo_comuna == c(13127)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae4_renca <- data %>% filter(codigo_comuna == c(13128)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae4_stgo <- data %>% filter(codigo_comuna == c(13101)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
print(paste("Precio promedio DEPTO UF/m2 comuna Conchali: ", round(mean(ae4_conchali$uf_m2_promedio),2)))
## [1] "Precio promedio DEPTO UF/m2 comuna Conchali: 41.11"
print(paste("Precio promedio DEPTO UF/m2 comuna Recoleta: ", round(mean(ae4_recoleta$uf_m2_promedio),2)))
## [1] "Precio promedio DEPTO UF/m2 comuna Recoleta: 57.83"
print(paste("Precio promedio DEPTO UF/m2 comuna Renca: ", round(mean(ae4_renca$uf_m2_promedio),2)))
## [1] "Precio promedio DEPTO UF/m2 comuna Renca: 38.66"
print(paste("Precio promedio DEPTO UF/m2 comuna Santiago Centro: ", round(mean(ae4_stgo$uf_m2_promedio),2)))
## [1] "Precio promedio DEPTO UF/m2 comuna Santiago Centro: 62.55"
ae5 <- data_1_casas <- data %>%
filter(d_proyecto==0) %>%
filter(d_casa==1) %>%
filter(codigo_comuna == c(13108,
13128,
13104,
13127,
13101)) %>%
group_by(codigo_zona) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2)) %>%
dplyr::select(codigo_zona, uf_m2_promedio) %>%
left_join(censo_zc, by = c("codigo_zona"="COD_INE_15"))
## Warning in codigo_comuna == c(13108, 13128, 13104, 13127, 13101): longitud de
## objeto mayor no es múltiplo de la longitud de uno menor
ae5 <- st_sf(ae5)
tm_shape(ae5) + tm_fill("uf_m2_promedio",palette = "Blues", title="", n=5, style = "quantile") + tm_borders() + tm_layout(legend.position = c("left", "bottom"))+ tm_layout(main.title = "Precio Promedio por Zona para Casas", main.title.size = 0.9,main.title.position="left")
ae5$NOMBRE_COMUNA <- as.factor(ifelse(ae5$COMUNA == 13108, "Independencia",
ifelse(ae5$COMUNA == 13101, 'Santiago',
ifelse(ae5$COMUNA == 13104, 'Conchalí',
ifelse(ae5$COMUNA == 13127, 'Recoleta',
ifelse(ae5$COMUNA == 13128, 'Renca',""))))))
tm_shape(ae5) + tm_polygons("NOMBRE_COMUNA")
ae5_conchali <- data %>% filter(codigo_comuna == c(13104)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==1) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae5_recoleta <- data %>% filter(codigo_comuna == c(13127)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==1) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae5_renca <- data %>% filter(codigo_comuna == c(13128)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==1) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
ae5_stgo <- data %>% filter(codigo_comuna == c(13101)) %>%
filter(d_proyecto==0) %>%
filter(d_casa==1) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2))
print(paste("Precio promedio CASA UF/m2 comuna Conchali: ", round(mean(ae5_conchali$uf_m2_promedio),2)))
## [1] "Precio promedio CASA UF/m2 comuna Conchali: 39.46"
print(paste("Precio promedio CASA UF/m2 comuna Recoleta: ", round(mean(ae5_recoleta$uf_m2_promedio),2)))
## [1] "Precio promedio CASA UF/m2 comuna Recoleta: 47.97"
print(paste("Precio promedio CASA UF/m2 comuna Renca: ", round(mean(ae5_renca$uf_m2_promedio),2)))
## [1] "Precio promedio CASA UF/m2 comuna Renca: 38.27"
print(paste("Precio promedio CASA UF/m2 comuna Santiago Centro: ", round(mean(ae5_stgo$uf_m2_promedio),2)))
## [1] "Precio promedio CASA UF/m2 comuna Santiago Centro: 47.59"
Ahora analizando nuestra de estudio:
mapita_ind_deptos <- data_1_deptos <- data %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
filter(codigo_comuna == comuna_indep) %>%
group_by(codigo_zona) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2)) %>%
dplyr::select(codigo_zona, uf_m2_promedio) %>%
left_join(censo_zc, by = c("codigo_zona"="COD_INE_15"))
mapita_ind_deptos <- st_sf(mapita_ind_deptos)
tm_shape(mapita_ind_deptos) + tm_fill("uf_m2_promedio",palette = "Reds", title="", n=5, style = "quantile") + tm_borders() + tm_layout(legend.position = c("left", "bottom"))+ tm_layout(main.title = "Precio Promedio por Zona para Departamentos", main.title.size = 0.9,main.title.position="left")
mapita_ind_casas <- data_1_deptos <- data %>%
filter(d_casa==1) %>%
filter(d_proyecto==0)%>%
filter(codigo_comuna == comuna_indep) %>%
group_by(codigo_zona) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2)) %>%
dplyr::select(codigo_zona, uf_m2_promedio) %>%
left_join(censo_zc, by = c("codigo_zona"="COD_INE_15"))
mapita_ind_casas <- st_sf(mapita_ind_casas)
tm_shape(mapita_ind_casas) + tm_fill("uf_m2_promedio",palette = "Blues", title="", n=5, style = "quantile") + tm_borders() + tm_layout(legend.position = c("left", "bottom"))+ tm_layout(main.title = "Precio Promedio por Zona para Casas", main.title.size = 0.9,main.title.position="left")
#creación variable UF/M2 base de datos:
#valor UF 8 septiembre 2022: 33,958.13
precios_depto <- precios_depto %>%
mutate(uf=(precio/33958),
ufm2 = uf/area_total) %>%
dplyr::filter(uf>1000)
precios_casa <- precios_casa %>%
mutate(uf=(precio/33958),
ufm2 = uf/area_total) %>%
dplyr::filter(uf>1000)
indep_conchali <- c(13104,13108)
ae_ae <- censo_zc %>%
dplyr::filter(COMUNA == 13108) %>%
dplyr::filter(AREA == 1)
base <-
ae_ae %>%
ggplot() +
geom_sf(fill = NA) +
theme_bw()
base +
geom_sf(data = precios_depto, color = alpha("blue", 0.8)) +
ggtitle('Deptos en Venta Independencia 2022')+
coord_sf(xlim = c(-70.645, -70.682), ylim = c(-33.40, -33.432))
precios_casa <- precios_casa %>%
filter(!is.na(geometry))
base +
geom_sf(data = precios_casa, color = alpha("red", 0.8)) +
ggtitle('Casas en Venta Independencia 2022') +
coord_sf(xlim = c(-70.645, -70.682), ylim = c(-33.40, -33.432))
print(paste("Precio promedio DEPTO UF/m2 comuna Independencia: ", round(mean(precios_depto$ufm2),2)))
## [1] "Precio promedio DEPTO UF/m2 comuna Independencia: 53.84"
print(paste("Precio promedio CASA UF/m2 comuna Independencia: ", round(mean(precios_casa$ufm2),2)))
## [1] "Precio promedio CASA UF/m2 comuna Independencia: 28.86"
ae4 <- data_1_deptos <- data %>%
filter(d_proyecto==0) %>%
filter(d_casa==0) %>%
filter(codigo_comuna == c(13108,
13128,
13104,
13127,
13101)) %>%
group_by(codigo_zona) %>%
mutate(ufm2 = uf/m2_const) %>%
summarise(uf_m2_promedio = mean(ufm2)) %>%
dplyr::select(codigo_zona, uf_m2_promedio) %>%
left_join(censo_zc, by = c("codigo_zona"="COD_INE_15"))
ae4 <- st_sf(ae4)
tm_shape(ae4) + tm_fill("uf_m2_promedio",palette = "Reds", title="", n=5, style = "quantile") + tm_borders() + tm_layout(legend.position = c("left", "bottom"))+ tm_layout(main.title = "Precio Promedio por Zona para Departamentos", main.title.size = 0.9,main.title.position="left")
#PREGUNTA 2: Unir y hacer un ESDA en la base de datos original a través de estudiar la localización a estaciones de metro, áreas verdes, educación y cualquier otro elemento que puedan considerar relevante en su comuna de estudia.
Plazas <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Plazas.shp")
## Reading layer `Plazas' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Plazas.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 22235 features and 16 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -73.79706 ymin: -53.18867 xmax: -68.89847 ymax: -18.42497
## Geodetic CRS: WGS 84
Parques <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Parques.shp")
## Reading layer `Parques' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Parques.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1856 features and 16 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -73.80183 ymin: -53.18216 xmax: -68.91595 ymax: -18.4272
## Geodetic CRS: WGS 84
Metro <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/SHP - Metro/Metro.shp")
## Reading layer `Metro' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\SHP - Metro\Metro.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 117 features and 10 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -70.75738 ymin: -33.60937 xmax: -70.54485 ymax: -33.36651
## Geodetic CRS: WGS 84
Salud <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Salud.shp")
## Reading layer `Salud' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Salud.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 848 features and 31 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.79259 ymin: -53.18052 xmax: -68.92335 ymax: -18.42689
## Geodetic CRS: WGS 84
Paraderos <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Paraderos.shp")
## Reading layer `Paraderos' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Paraderos.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 27076 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.79084 ymin: -53.18406 xmax: -68.90461 ymax: -18.42763
## Geodetic CRS: WGS 84
Edu <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Ed_Inicial.shp")
## Reading layer `Ed_Inicial' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Ed_Inicial.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2311 features and 44 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.79403 ymin: -53.18278 xmax: -68.92593 ymax: -18.42897
## Geodetic CRS: WGS 84
Colegios <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Colegios.shp")
## Reading layer `Colegios' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Colegios.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4660 features and 13 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -78.82864 ymin: -54.9348 xmax: -67.60534 ymax: -18.19582
## Geodetic CRS: WGS 84
Concesion<- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Concesion_sanitaria.shp")
## Reading layer `Concesion_Sanitaria' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Concesion_Sanitaria.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 367 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -73.83751 ymin: -53.30726 xmax: -68.89493 ymax: -18.39995
## Geodetic CRS: WGS 84
Ciclovias <- st_read("C:/Users/matir/OneDrive/Escritorio/Mati/Universidad/2022/Segundo Trimestre/Analitica Espacial/bases/BBDD - Urbanas/Ciclovias.shp")
## Reading layer `Ciclovias' from data source
## `C:\Users\matir\OneDrive\Escritorio\Mati\Universidad\2022\Segundo Trimestre\Analitica Espacial\bases\BBDD - Urbanas\Ciclovias.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 878 features and 8 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: -73.25704 ymin: -53.18382 xmax: -68.90344 ymax: -18.42773
## Geodetic CRS: WGS 84
#PARTE A: ESDA DEPARTAMENTOS # V. Estudio localización: Áreas verdes
#DISTANCIAS AREAS VERDES DEPTOS
ae <- censo_zc %>%
dplyr::filter(COMUNA =="13108") %>%
dplyr::filter(AREA == 1)
grafo_base <-
ae %>%
ggplot() +
geom_sf(fill = NA) +
theme_bw()
Precios <- precios_depto %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Plazas <- st_make_valid(Plazas) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Parques <- st_make_valid(Parques)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios, aes(fill="Precios"), alpha=0.5,color="orange",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Plazas, color = alpha("green", 1), size = 1) +
geom_sf(data = Parques, color = alpha("pink", 1), size = 1) +
ggtitle('Distancias Areas Verdes Deptos Independencia')+
scale_fill_manual(values = c(Precios = "orange",
Plazas = "green",
Parques = "pink"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Deptos en venta", "Plazas", "Parques"), name = "Leyenda:")+
theme_void()
deptos_plaza <- st_distance(Precios, Plazas)
deptos_parque <- st_distance(Precios, Parques)
Precios$dist_plaza <- apply(deptos_plaza, 1, min)
Precios$dist_parque <- apply(deptos_parque, 1, min)
#Creamos la variable UF/m2
Precios <- Precios %>%
mutate(uf=(precio/33000),
ufm2 = uf/area_total) %>%
dplyr::filter(uf>1000)
ggplot(data=Precios,aes(x=area_total,y=uf)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Superficie [m²]',
y = 'UF',
title = 'Valor Deptos en función de la superficie')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PLAZAS
ggplot(data=Precios,aes(x=dist_plaza,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor Deptos en función de la distancia a plazas')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PARQUES
ggplot(data=Precios,aes(x=dist_parque,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor Deptos en función de la distancia a parques')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIAS TRANSPORTES DEPTOS
Precios <- Precios %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Metro <- st_make_valid(Metro) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Paraderos <- st_make_valid(Paraderos)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Ciclovias <- st_make_valid(Ciclovias)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data = Precios, color = alpha("orange", 0.5)) +
ggtitle('Distancias Transportes Deptos Independencia')
grafo_base +
geom_sf(data=Precios, aes(fill="Precios"), alpha=0.5,color="orange",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Metro, color = alpha("red", 1), size = 1) +
geom_sf(data = Paraderos, color = alpha("grey", 1), size = 1) +
geom_sf(data = Ciclovias, color = alpha("cyan", 1), size = 1) +
ggtitle('Distancias Transportes Deptos Independencia')+
scale_fill_manual(values = c(Precios = "orange",
Metro = "red",
Paraderos = "grey",
Ciclovias = "cyan"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Deptos en venta", "Metro","Paraderos","Ciclovias"), name = "Leyenda:")+
theme_void()
deptos_metro <- st_distance(Precios, Metro)
deptos_paradero <- st_distance(Precios, Paraderos)
deptos_ciclovia <- st_distance(Precios, Ciclovias)
Precios$dist_metro <- apply(deptos_metro, 1, min)
Precios$dist_paradero <- apply(deptos_paradero, 1, min)
Precios$dist_ciclovia <- apply(deptos_ciclovia, 1, min)
#DISTANCIA METRO
ggplot(data=Precios,aes(x=dist_metro,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a metro')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PARADEROS
ggplot(data=Precios,aes(x=dist_paradero,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a paraderos')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA CICLOVIAS
ggplot(data=Precios,aes(x=dist_ciclovia,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a ciclovias')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA SALUD
Precios <- Precios %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Salud <- st_make_valid(Salud) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios, aes(fill="Precios"), alpha=0.5,color="orange",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Salud, color = alpha("magenta", 1), size = 1) +
ggtitle('Distancias Salud Deptos Independencia')+
scale_fill_manual(values = c(Precios = "orange",
Metro = "magenta"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Deptos en venta", "Salud"), name = "Leyenda:")+
theme_void()
deptos_salud <- st_distance(Precios, Salud)
Precios$dist_salud <- apply(deptos_salud, 1, min)
#DISTANCIA SALUD
ggplot(data=Precios,aes(x=dist_salud,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor Deptos en función de la distancia a salud')
## `geom_smooth()` using formula 'y ~ x'
#CONCESIÓN SANITARIA (manejo de agua potable y aguas servidas)
Precios <- Precios %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Concesion <- st_make_valid(Concesion) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios, aes(fill="Precios"), alpha=0.5,color="orange",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Concesion, color = alpha("yellow", 1), size = 1) +
ggtitle("Concesión sanitaria Independencia")+
scale_fill_manual(values = c(Precios = "orange",
Concesion = "yellow"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Deptos en venta", "Concesion"), name = "Leyenda:")+
theme_void()
deptos_concesion <- st_distance(Precios, Concesion)
Precios$dist_concesion <- apply(deptos_salud, 1, min)
#CONCESION SANITARIA
ggplot(data=Precios,aes(x=dist_concesion,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la concesión sanitaria')
## `geom_smooth()` using formula 'y ~ x'
#Distancia Educación Independencia
Precios <- Precios %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Edu <- st_make_valid(Edu) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Colegios <- st_make_valid(Colegios) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios, aes(fill="Precios"), alpha=0.5,color="orange",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Edu, color = alpha("yellow", 1), size = 1) +
geom_sf(data = Colegios, color = alpha("black", 1), size = 1) +
ggtitle('Distancias Deptos Educación Independencia')+
scale_fill_manual(values = c(Precios = "orange",
Edu = "yellow",
Colegios = "black"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Deptos en venta","Edu","Colegios"), name = "Leyenda:")+
theme_void()
deptos_edu <- st_distance(Precios, Edu)
deptos_colegios <- st_distance(Precios, Colegios)
Precios$dist_edu <- apply(deptos_edu, 1, min)
Precios$dist_colegios <- apply(deptos_colegios, 1, min)
#DISTANCIA EDUCACIÓN
ggplot(data=Precios,aes(x=dist_edu,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a educación inicial')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA COLEGIOS
ggplot(data=Precios,aes(x=dist_colegios,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a colegios')
## `geom_smooth()` using formula 'y ~ x'
#PARTE B: ESDA CASAS # V. Estudio localización: Áreas verdes
#DISTANCIAS AREAS VERDES CASAS
Precios1 <- precios_casa %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Plazas <- st_make_valid(Plazas) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Parques <- st_make_valid(Parques)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios1, aes(fill="Precios1"), alpha=0.5,color="blue",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Plazas, color = alpha("green", 1), size = 1) +
geom_sf(data = Parques, color = alpha("pink", 1), size = 1) +
ggtitle('Distancias Areas Verdes Casas Independencia')+
scale_fill_manual(values = c(Precios1 = "blue",
Plazas = "green",
Parques = "pink"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Casas en venta", "Plazas", "Parques"), name = "Leyenda:")+
theme_void()
casas_plaza <- st_distance(Precios1, Plazas)
casas_parque <- st_distance(Precios1, Parques)
Precios1$dist_plaza <- apply(casas_plaza, 1, min)
Precios1$dist_parque <- apply(casas_parque, 1, min)
#Creamos la variable UF/m2
Precios1 <- Precios1 %>%
mutate(uf=(precio/33000),
ufm2 = uf/area_total) %>%
dplyr::filter(uf>1000)
ggplot(data=Precios1,aes(x=area_total,y=uf)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Superficie [m²]',
y = 'UF',
title = 'Valor en función de la superficie')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PLAZAS
ggplot(data=Precios1,aes(x=dist_plaza,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a plazas')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PARQUES
ggplot(data=Precios1,aes(x=dist_parque,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a parques')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIAS TRANSPORTES CASAS
Precios1 <- Precios1 %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Metro <- st_make_valid(Metro) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Paraderos <- st_make_valid(Paraderos)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Ciclovias <- st_make_valid(Ciclovias)%>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios1, aes(fill="Precios1"), alpha=0.5,color="blue",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Metro, color = alpha("red", 1), size = 1) +
geom_sf(data = Paraderos, color = alpha("grey", 1), size = 1) +
geom_sf(data = Ciclovias, color = alpha("cyan", 1), size = 1) +
ggtitle('Distancias Transportes Casas Independencia')+
scale_fill_manual(values = c(Precios1 = "blue",
Metro = "red",
Paraderos = "grey",
Ciclovias = "cyan"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Casas en venta", "Metro","Paraderos","Ciclovias"), name = "Leyenda:")+
theme_void()
casas_metro <- st_distance(Precios1, Metro)
casas_paradero <- st_distance(Precios1, Paraderos)
casas_ciclovia <- st_distance(Precios1, Ciclovias)
Precios1$dist_metro <- apply(casas_metro, 1, min)
Precios1$dist_paradero <- apply(casas_paradero, 1, min)
Precios1$dist_ciclovia <- apply(casas_ciclovia, 1, min)
#DISTANCIA METRO
ggplot(data=Precios1,aes(x=dist_metro,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a metro')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA PARADEROS
ggplot(data=Precios1,aes(x=dist_paradero,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a paraderos')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA CICLOVIAS
ggplot(data=Precios1,aes(x=dist_ciclovia,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a ciclovias')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA SALUD
Precios1 <- Precios1 %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Salud <- st_make_valid(Salud) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios1, aes(fill="Precios1"), alpha=0.5,color="blue",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Salud, color = alpha("magenta", 1), size = 1) +
ggtitle('Distancias Salud Casas Independencia')+
scale_fill_manual(values = c(Precios1 = "blue",
Metro = "magenta"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Casas en venta", "Salud"), name = "Leyenda:")+
theme_void()
casas_salud <- st_distance(Precios1, Salud)
Precios1$dist_salud <- apply(casas_salud, 1, min)
#DISTANCIA SALUD
ggplot(data=Precios1,aes(x=dist_salud,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a salud')
## `geom_smooth()` using formula 'y ~ x'
#CONCESIÓN SANITARIA (manejo de agua potable y aguas servidas)
#Precios1 <- Precios1 %>%
# st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
#Concesion <- st_make_valid(Concesion) %>%
# st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
#grafo_base +
# geom_sf(data=Precios1, aes(fill="Precios1"), alpha=0.5,color="blue",
# show.legend = "polygon", inherit.aes = F)+
# geom_sf(data = Concesion, color = alpha("yellow", 1), size = 1) +
# ggtitle("Concesión sanitaria Independencia")+
# scale_fill_manual(values = c(Precios1 = "blue",
# Concesion = "yellow"),
# guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
# labels = c("Casas en venta", "Concesion sanitaria"), name = "Leyenda:")+
# theme_void()
#casas_concesion <- st_distance(Precios1, Concesion)
#Precios1$dist_concesion <- apply(casas_salud, 1, min)
#CONCESION SANITARIA
#ggplot(data=Precios1,aes(x=dist_concesion,y=ufm2)) +
# geom_point()+
# geom_smooth(method='lm', se = TRUE)+
# labs(x = 'Distancia [mts]',
# y = 'UF/m²',
# title = 'Valor en función de la Concesión Sanitaria')
#Distancia Educación Independencia
Precios1 <- Precios1 %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Edu <- st_make_valid(Edu) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
Colegios <- st_make_valid(Colegios) %>%
st_join(transmute(ae, COMUNA),join = st_intersects, left = FALSE)
grafo_base +
geom_sf(data=Precios1, aes(fill="Precios1"), alpha=0.5,color="blue",
show.legend = "polygon", inherit.aes = F)+
geom_sf(data = Edu, color = alpha("yellow", 1), size = 1) +
geom_sf(data = Colegios, color = alpha("black", 1), size = 1) +
ggtitle('Distancias Educación Casas Independencia')+
scale_fill_manual(values = c(Precios1 = "blue",
Edu = "yellow",
Colegios = "black"),
guide = guide_legend(override.aes = list(linetype = "blank", shape = NA)),
labels = c("Casas en venta","Educación inicial","Colegios"), name = "Leyenda:")+
theme_void()
casas_edu <- st_distance(Precios1, Edu)
casas_colegios <- st_distance(Precios1, Colegios)
Precios1$dist_edu <- apply(casas_edu, 1, min)
Precios1$dist_colegios <- apply(casas_colegios, 1, min)
#DISTANCIA EDUCACIÓN INICIAL
ggplot(data=Precios1,aes(x=dist_edu,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a Educación Inicial')
## `geom_smooth()` using formula 'y ~ x'
#DISTANCIA COLEGIOS
ggplot(data=Precios1,aes(x=dist_colegios,y=ufm2)) +
geom_point()+
geom_smooth(method='lm', se = TRUE)+
labs(x = 'Distancia [mts]',
y = 'UF/m²',
title = 'Valor en función de la distancia a colegios')
## `geom_smooth()` using formula 'y ~ x'
#PREGUNTA 3: Realizar un modelo econométrico OLS con efectos fijos y otro SAR/SEM para luego comparar y determinar las variables más relevantes
casas <- Precios1
deptos <- Precios
# manejo datos
library(dplyr)
library(stringr)
library(mgsub)
#Paquetes para analisis de regresiones
library(vtable)
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:spatstat.core':
##
## bc
## The following object is masked from 'package:spatstat.geom':
##
## ellipse
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(nortest)
# visualizacion
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## The following object is masked from 'package:spatstat.geom':
##
## rescale
## The following object is masked from 'package:viridis':
##
## viridis_pal
summary(casas)
## precio bedrooms bathrooms area_total
## Min. : 59000000 Min. : 1.000 Min. :1.00 Min. : 80.0
## 1st Qu.:118693654 1st Qu.: 3.000 1st Qu.:1.00 1st Qu.:100.0
## Median :155217387 Median : 4.000 Median :3.00 Median :173.0
## Mean :184496505 Mean : 4.049 Mean :2.89 Mean :204.2
## 3rd Qu.:244169802 3rd Qu.: 4.000 3rd Qu.:4.00 3rd Qu.:250.0
## Max. :508687089 Max. :10.000 Max. :7.00 Max. :550.0
## area_util dir.x ID uf
## Min. : 63.0 Length:182 Min. : 1.00 Min. : 1788
## 1st Qu.: 90.0 Class :character 1st Qu.: 60.25 1st Qu.: 3597
## Median :117.0 Mode :character Median :119.50 Median : 4704
## Mean :142.2 Mean :118.68 Mean : 5591
## 3rd Qu.:180.0 3rd Qu.:178.50 3rd Qu.: 7399
## Max. :300.0 Max. :234.00 Max. :15415
## ufm2 COMUNA.x dist_plaza dist_parque
## Min. :14.13 Length:182 Min. : 0.00 Min. : 122.4
## 1st Qu.:23.23 Class :character 1st Qu.: 0.00 1st Qu.: 859.8
## Median :30.26 Mode :character Median : 93.81 Median :1325.0
## Mean :29.15 Mean :155.97 Mean :1271.0
## 3rd Qu.:35.97 3rd Qu.:316.00 3rd Qu.:1702.9
## Max. :42.09 Max. :541.30 Max. :2310.3
## COMUNA.y dist_metro dist_paradero dist_ciclovia
## Length:182 Min. : 10.07 Min. : 19.08 Min. : 180.2
## Class :character 1st Qu.: 20.20 1st Qu.: 53.98 1st Qu.: 748.9
## Mode :character Median : 450.77 Median : 67.36 Median :1491.2
## Mean : 587.60 Mean : 99.07 Mean :1352.3
## 3rd Qu.:1039.80 3rd Qu.:112.59 3rd Qu.:1895.8
## Max. :1887.12 Max. :445.27 Max. :2237.2
## COMUNA.x.1 dist_salud COMUNA.y.1 geometry
## Length:182 Min. : 110.4 Length:182 POINT :182
## Class :character 1st Qu.: 355.1 Class :character epsg:4326 : 0
## Mode :character Median : 579.3 Mode :character +proj=long...: 0
## Mean : 618.2
## 3rd Qu.: 941.8
## Max. :1021.7
## dist_edu dist_colegios
## Min. : 293.4 Min. : 76.02
## 1st Qu.: 360.9 1st Qu.:176.15
## Median : 480.7 Median :384.55
## Mean : 492.0 Mean :309.76
## 3rd Qu.: 518.1 3rd Qu.:420.53
## Max. :1065.1 Max. :534.99
summary(deptos)
## precio bedrooms bathrooms area_total
## Min. : 52000000 Min. :1.000 Min. :1.0 Min. :27.00
## 1st Qu.: 59685952 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:31.00
## Median : 67824945 Median :2.000 Median :2.0 Median :40.00
## Mean : 76429987 Mean :1.978 Mean :1.6 Mean :41.36
## 3rd Qu.: 84781181 3rd Qu.:3.000 3rd Qu.:2.0 3rd Qu.:47.00
## Max. :142432385 Max. :4.000 Max. :3.0 Max. :75.00
## area_util dir.x ID uf
## Min. :27.00 Length:135 Min. : 1.00 Min. :1576
## 1st Qu.:30.00 Class :character 1st Qu.: 37.50 1st Qu.:1809
## Median :38.00 Mode :character Median : 74.00 Median :2055
## Mean :39.44 Mean : 73.76 Mean :2316
## 3rd Qu.:45.04 3rd Qu.:110.50 3rd Qu.:2569
## Max. :68.00 Max. :144.00 Max. :4316
## ufm2 COMUNA.x dist_plaza dist_parque
## Min. :41.47 Length:135 Min. : 0.00 Min. : 768.2
## 1st Qu.:52.10 Class :character 1st Qu.: 30.83 1st Qu.:1017.2
## Median :55.92 Mode :character Median :132.28 Median :1412.2
## Mean :56.17 Mean :168.26 Mean :1456.5
## 3rd Qu.:58.41 3rd Qu.:268.63 3rd Qu.:1660.6
## Max. :90.36 Max. :461.24 Max. :2623.4
## COMUNA.y dist_metro dist_paradero dist_ciclovia
## Length:135 Min. : 20.2 Min. : 4.77 Min. : 1.102
## Class :character 1st Qu.: 328.2 1st Qu.: 51.33 1st Qu.: 569.304
## Mode :character Median : 577.2 Median : 66.72 Median :1166.363
## Mean : 529.1 Mean :106.95 Mean :1249.998
## 3rd Qu.: 745.0 3rd Qu.:123.16 3rd Qu.:1895.786
## Max. :1350.5 Max. :319.12 Max. :2495.100
## COMUNA.x.1 dist_salud COMUNA.y.1 dist_concesion
## Length:135 Min. : 75.13 Length:135 Min. : 75.13
## Class :character 1st Qu.: 359.09 Class :character 1st Qu.: 359.09
## Mode :character Median : 668.78 Mode :character Median : 668.78
## Mean : 711.29 Mean : 711.29
## 3rd Qu.:1018.67 3rd Qu.:1018.67
## Max. :1613.06 Max. :1613.06
## COMUNA geometry dist_edu dist_colegios
## Length:135 POINT :135 Min. : 68.94 Min. : 49.95
## Class :character epsg:4326 : 0 1st Qu.: 434.69 1st Qu.:135.37
## Mode :character +proj=long...: 0 Median : 614.78 Median :356.01
## Mean : 605.91 Mean :301.70
## 3rd Qu.: 725.50 3rd Qu.:389.52
## Max. :1250.89 Max. :543.97
En primer lugar, se realizara un analisis exploratorio de todas las variables dentro de las bases de datos “casas” y “deptos”
Analisis Exploratorio Casas: 1.PRECIO
ggplot(data = casas, aes(x = precio)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Precio", y = "Frecuencia", title = "Distribución de Precios de Casas en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
casas <-
casas %>%
dplyr::filter(precio <= 350000000)
ggplot(data = casas, aes(x = precio)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Precio", y = "Frecuencia", title = "Distribución de Precios de Casas en Independencia", subtitle = "Sin outliers") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = casas, aes(x = bedrooms)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 10) +
labs(x = "Cantidad de Dormitorios", y = "Frecuencia", title = "Cantidad de Dormitorios en Casas en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
casas <-
casas %>%
dplyr::filter(bedrooms <= 8)
ggplot(data = casas, aes(x = bedrooms)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 10) +
labs(x = "Cantidad de Dormitorios", y = "Frecuencia", title = "Cantidad de Dormitorios en Casas en Independencia", subtitle = "Sin outliers") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = casas, aes(x = bathrooms)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 10) +
labs(x = "Cantidad de Baños", y = "Frecuencia", title = "Cantidad de Baños en Casas en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
casas <-
casas %>%
dplyr::filter(bathrooms <= 5)
ggplot(data = casas, aes(x = bathrooms)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 10) +
labs(x = "Cantidad de Baños", y = "Frecuencia", title = "Cantidad de Baños en Casas en Independencia", subtitle = "Sin outliers") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = casas, aes(x = area_total)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Área Total", y = "Frecuencia", title = "Área Total de las Casas en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = casas, aes(x = area_util)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Área Útil", y = "Frecuencia", title = "Área Útil de las Casas en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = casas, aes(x = dist_metro)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas al Metro en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_paradero)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Paraderos en Independencia") +
theme_classic()
casas <-
casas %>%
dplyr::filter(dist_paradero <= 250)
ggplot(data = casas, aes(x = dist_paradero)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 30) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Paraderos en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_plaza)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Plazas en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_parque)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Parques en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_salud)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Centros de Salud en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_edu)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Jardines Infantiles en Independencia") +
theme_classic()
ggplot(data = casas, aes(x = dist_colegios)) +
geom_histogram(color = "darkblue", fill = "lightblue", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de las Casas a Colegios en Independencia") +
theme_classic()
Analisis Exploratorio Departamentos: 1. PRECIO
ggplot(data = deptos, aes(x = precio)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Precio de la propiedad", y = "Frecuencia", title = "Distribución Precio de Departamentos en Independencia") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
Eliminaremos outliers
deptos <-
deptos %>%
dplyr::filter(precio < 125000000)
ggplot(data = deptos, aes(x = precio)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Precio de la propiedad", y = "Frecuencia", title = "Distribución Precio de Departamentos en Independencia", subtitle = "Sin outliers") +
theme_classic() +
scale_x_continuous(labels = number_format(scale = 1))
ggplot(data = deptos, aes(x = bedrooms)) +
geom_histogram(color = "red", fill = "pink", bins = 10) +
labs(x = "Cantidad de dormitorios", y = "Frecuencia", title = "Cantidad de Dormitorios en Departamentos en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = bathrooms)) +
geom_histogram(color = "red", fill = "pink", bins = 10) +
labs(x = "Cantidad de baños", y = "Frecuencia", title = "Cantidad de Baños en Departamentos en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = area_total)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Area total", y = "Frecuencia", title = "Área Total de los Departamentos en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = area_util)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Area util", y = "Frecuencia", title = "Área Útil de los Departamentos en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_metro)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos al Metro en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_paradero)) +
geom_histogram(color = "red", fill = "pink", bins = 30) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Paraderos en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_plaza)) +
geom_histogram(color = "red", fill = "pink", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Plazas en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_parque)) +
geom_histogram(color = "red", fill = "pink", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Parques en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_salud)) +
geom_histogram(color = "red", fill = "pink", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Centros de Salud en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_edu)) +
geom_histogram(color = "red", fill = "pink", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Jardines Infantiles en Independencia") +
theme_classic()
ggplot(data = deptos, aes(x = dist_colegios)) +
geom_histogram(color = "red", fill = "pink", bins = 20) +
labs(x = "Distancia (mts)", y = "Frecuencia", title = "Distancia de los Departamentos a Colegios en Independencia") +
theme_classic()
Comenzamos con el Modelo Econometrico OLS
CASAS
modelo1_casas <- lm(precio ~ bedrooms + bathrooms + area_total + area_util + dist_metro + dist_paradero + dist_plaza + dist_parque + dist_salud + dist_edu + dist_colegios, data = casas)
summary(modelo1_casas)
##
## Call:
## lm(formula = precio ~ bedrooms + bathrooms + area_total + area_util +
## dist_metro + dist_paradero + dist_plaza + dist_parque + dist_salud +
## dist_edu + dist_colegios, data = casas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58645672 -20919660 -8625688 23531704 69516517
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14684041 31658861 0.464 0.64350
## bedrooms 9873085 4298615 2.297 0.02311 *
## bathrooms 20730293 3434463 6.036 1.34e-08 ***
## area_total 678063 82302 8.239 1.12e-13 ***
## area_util -265238 129596 -2.047 0.04256 *
## dist_metro -10748 8252 -1.302 0.19490
## dist_paradero -105989 61034 -1.737 0.08467 .
## dist_plaza -116543 27117 -4.298 3.21e-05 ***
## dist_parque 27601 11014 2.506 0.01335 *
## dist_salud -40180 12730 -3.156 0.00196 **
## dist_edu 18829 23226 0.811 0.41894
## dist_colegios -13281 26887 -0.494 0.62210
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34430000 on 140 degrees of freedom
## Multiple R-squared: 0.8379, Adjusted R-squared: 0.8252
## F-statistic: 65.79 on 11 and 140 DF, p-value: < 2.2e-16
library(corrplot)
## corrplot 0.92 loaded
estudio_corr <- casas[,c("precio","bedrooms","bathrooms","area_total","area_util","dist_metro","dist_paradero","dist_plaza","dist_parque","dist_salud","dist_edu","dist_colegios")]
estudio_corr <- transform(estudio_corr, bedrooms = as.numeric(bedrooms), bathrooms = as.numeric(bathrooms), area_total = as.numeric(area_total))
estudio_corr <- as.data.frame(estudio_corr)
estudio_corr <- estudio_corr[,-13]
corrp <- round(cor(estudio_corr), digits = 2)
corrplot(corrp, method = "color", tl.cex = 0.7, tl.srt = 60)
En base a la primera regresión realizada y el análisis de las correlaciones entre las variables, se tomarán en consideración para la segunda regresión aquellas variables que muestran ser estadisticamente significativas, las cuales corresponden a:
modelo2_casas <- lm(precio ~ bedrooms + area_total + dist_plaza + dist_parque + dist_salud, data = casas)
summary(modelo2_casas)
##
## Call:
## lm(formula = precio ~ bedrooms + area_total + dist_plaza + dist_parque +
## dist_salud, data = casas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63012846 -25400230 -3938547 23385779 98405802
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40090342 15145908 -2.647 0.00901 **
## bedrooms 28047890 3182549 8.813 3.31e-15 ***
## area_total 557922 33524 16.643 < 2e-16 ***
## dist_plaza -187060 25867 -7.232 2.48e-11 ***
## dist_parque 52068 7859 6.625 6.22e-10 ***
## dist_salud -45849 11082 -4.137 5.91e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38330000 on 146 degrees of freedom
## Multiple R-squared: 0.7905, Adjusted R-squared: 0.7833
## F-statistic: 110.2 on 5 and 146 DF, p-value: < 2.2e-16
DEPARTAMENTOS
modelo1_deptos <- lm(precio ~ bedrooms + bathrooms + area_total + area_util + dist_metro + dist_paradero + dist_plaza + dist_parque + dist_salud + dist_edu + dist_colegios, data = deptos)
summary(modelo1_deptos)
##
## Call:
## lm(formula = precio ~ bedrooms + bathrooms + area_total + area_util +
## dist_metro + dist_paradero + dist_plaza + dist_parque + dist_salud +
## dist_edu + dist_colegios, data = deptos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12923017 -2748535 207376 3598794 12440377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25580106 6754788 3.787 0.000242 ***
## bedrooms 8194646 1322602 6.196 8.93e-09 ***
## bathrooms 5076498 1626272 3.122 0.002267 **
## area_total 1782693 346870 5.139 1.12e-06 ***
## area_util -946815 429135 -2.206 0.029314 *
## dist_metro -17505 3581 -4.888 3.27e-06 ***
## dist_paradero -6392 9834 -0.650 0.516938
## dist_plaza -45396 9708 -4.676 7.91e-06 ***
## dist_parque 6568 1767 3.716 0.000312 ***
## dist_salud -3840 3953 -0.971 0.333410
## dist_edu 3815 3960 0.964 0.337259
## dist_colegios -6910 6875 -1.005 0.316936
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6558000 on 117 degrees of freedom
## Multiple R-squared: 0.884, Adjusted R-squared: 0.8731
## F-statistic: 81.03 on 11 and 117 DF, p-value: < 2.2e-16
estudio_corr_dept <- deptos[,c("precio","bedrooms","bathrooms","area_total","area_util","dist_metro","dist_paradero","dist_plaza","dist_parque","dist_salud","dist_edu","dist_colegios")]
estudio_corr_dept <- transform(estudio_corr_dept, bedrooms = as.numeric(bedrooms), bathrooms = as.numeric(bathrooms), area_total = as.numeric(area_total))
estudio_corr_dept <- as.data.frame(estudio_corr_dept)
estudio_corr_dept <- estudio_corr_dept[,-13]
corrp_d <- round(cor(estudio_corr_dept), digits = 2)
corrplot(corrp_d, method = "color", tl.cex = 0.7, tl.srt = 60)
En base a la primera regresión realizada con los datos sobre departamentos y el análisis de las correlaciones entre las variables, se tomarán en consideración para la segunda regresión aquellas variables que muestran ser estadisticamente significativas, las cuales corresponden a:
No obstante, algunas de las variables mencionadas poseen una correlación elevada con otra variable mencionada, por lo que sólo se debiese considerar una de ellas. Un ejemplo de lo anterior es el caso de las variables “bathrooms” y “bedrooms”, las cuales poseen una correlación cercano a 0.8, por lo que se podría extrapolar, tanto numérico como intuitivamente, que estas variables modelan o poseen efectos similares. De esta forma, las variables a utilizar para la segunda regresión de los precios de departamentos son:
modelo2_deptos <- lm(precio ~ bedrooms + area_total + dist_metro + dist_plaza + dist_parque, data = deptos)
summary(modelo2_deptos)
##
## Call:
## lm(formula = precio ~ bedrooms + area_total + dist_metro + dist_plaza +
## dist_parque, data = deptos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14356987 -5502007 -231879 6278056 13579446
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14703316 3528958 4.166 5.78e-05 ***
## bedrooms 8716017 1142883 7.626 5.67e-12 ***
## area_total 1171302 89489 13.089 < 2e-16 ***
## dist_metro -13863 1950 -7.109 8.38e-11 ***
## dist_plaza -34919 5544 -6.298 4.86e-09 ***
## dist_parque 5850 1713 3.416 0.000863 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6876000 on 123 degrees of freedom
## Multiple R-squared: 0.8659, Adjusted R-squared: 0.8605
## F-statistic: 158.9 on 5 and 123 DF, p-value: < 2.2e-16
Cargamos librerias
library(readr)
library(dplyr)
library(tidyverse)
library(sf)
library(sp)
library(ggplot2)
#library(tmap)
#library(tmaptools)
library(car)
library(lmtest)
#library(qpcR)
library(mblm)
library(RColorBrewer)
library(spdep)
library(spgwr)
library(raster)
library(leaflet)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(robustHD)
## Loading required package: perry
## Loading required package: parallel
## Loading required package: robustbase
##
## Attaching package: 'robustHD'
## The following object is masked from 'package:rgeos':
##
## getScale
Comenzando con SAR y SEM: CASAS
num_vecinos <- 4
Precios_esp_casas <- casas %>%
st_as_sf() %>%
st_jitter() %>%
as_Spatial()
ggplot() +
geom_sf() +
geom_sf(data = st_as_sf(Precios_esp_casas), color = alpha("blue",.3)) +
ggtitle('Modelo SAR para Casas en Independencia')
nb_casas <- nb2listw(neighbours = knn2nb(knn = knearneigh(x = Precios_esp_casas, k = num_vecinos, longlat = F)), style = "W")
moran.test(Precios_esp_casas$precio,listw = nb_casas)
##
## Moran I test under randomisation
##
## data: Precios_esp_casas$precio
## weights: nb_casas
##
## Moran I statistic standard deviate = 13.628, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.736416695 -0.006622517 0.002972870
casas$OLS_residuos <- modelo1_casas$residuals
moran.test(casas$OLS_residuos, listw = nb_casas)
##
## Moran I test under randomisation
##
## data: casas$OLS_residuos
## weights: nb_casas
##
## Moran I statistic standard deviate = 11.718, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.633143062 -0.006622517 0.002981002
lm.morantest(modelo1_casas, nb_casas, alternative = "greater")
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = precio ~ bedrooms + bathrooms + area_total +
## area_util + dist_metro + dist_paradero + dist_plaza + dist_parque +
## dist_salud + dist_edu + dist_colegios, data = casas)
## weights: nb_casas
##
## Moran I statistic standard deviate = 14.931, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.633143062 -0.074499248 0.002246231
Cuanto es la I de Moran respecto al precio: 0.738 (una pendiente muy alta) Valor p: los precios están autocorrelacionados entre si Precios son muy concentrados en ciertos sectores
Residuos: Si valor p es pequeño, nos dice que hay información contenida en los errores que es comun entre los 4 vecinos Hay cosas que no se están midiendo El precio, hay cosas en común, los errores también
Los vecinos, antes se definían por aristas, ahora son puntos. Se consideran los vecinos más cercanos En la matriz de precios busca los 4 vecinos más cercanos.
Comenzando con SAR y SEM: DEPARTAMENTOS
Precios_esp_deptos <- deptos %>%
st_as_sf() %>%
st_jitter() %>%
as_Spatial()
ggplot() +
geom_sf() +
geom_sf(data = st_as_sf(Precios_esp_deptos), color = alpha("red",.3)) +
ggtitle('Modelo SAR para Departamentos en Independencia')
nb_deptos <- nb2listw(neighbours = knn2nb(knn = knearneigh(x = Precios_esp_deptos, k = num_vecinos, longlat = F)), style = "W")
moran.test(Precios_esp_deptos$precio, listw = nb_deptos)
##
## Moran I test under randomisation
##
## data: Precios_esp_deptos$precio
## weights: nb_deptos
##
## Moran I statistic standard deviate = 9.3518, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.525928719 -0.007812500 0.003257398
deptos$OLS_residuos <- modelo1_deptos$residuals
moran.test(deptos$OLS_residuos, listw = nb_deptos)
##
## Moran I test under randomisation
##
## data: deptos$OLS_residuos
## weights: nb_deptos
##
## Moran I statistic standard deviate = 5.8169, p-value = 2.997e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.32453845 -0.00781250 0.00326444
lm.morantest(modelo1_deptos, nb_deptos, alternative = "greater")
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = precio ~ bedrooms + bathrooms + area_total +
## area_util + dist_metro + dist_paradero + dist_plaza + dist_parque +
## dist_salud + dist_edu + dist_colegios, data = deptos)
## weights: nb_deptos
##
## Moran I statistic standard deviate = 8.3542, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.324538449 -0.079428558 0.002338197
CASAS
Para realizar la I de Moran de los residuos se deben calcular y estudiar los residuos de la regresión. Para ello, se analizará si los errores siguen un comportamiento normal:
plot(modelo1_casas$residuals)
plot(modelo2_casas$residuals)
ggplot() +
geom_histogram(mapping = aes(x=modelo1_casas$residuals)) + xlab("OLS residuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot() +
geom_histogram(mapping = aes(x=modelo2_casas$residuals)) + xlab("OLS residuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot(modelo1_casas)
plot(modelo2_casas)
T_lagrange_casas <-lm.LMtests(modelo1_casas, nb_casas, test = "all")
summary(T_lagrange_casas)
## Lagrange multiplier diagnostics for spatial dependence
## data:
## model: lm(formula = precio ~ bedrooms + bathrooms + area_total +
## area_util + dist_metro + dist_paradero + dist_plaza + dist_parque +
## dist_salud + dist_edu + dist_colegios, data = casas)
## weights: nb_casas
##
## statistic parameter p.value
## LMerr 130.9075 1 < 2.2e-16 ***
## LMlag 61.8225 1 3.775e-15 ***
## RLMerr 73.6354 1 < 2.2e-16 ***
## RLMlag 4.5504 1 0.03291 *
## SARMA 135.4579 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finalmente, se calcularán los Errores Cuadráticos Medios de los modelos:
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:perry':
##
## mape
## The following object is masked from 'package:spatstat.core':
##
## auc
ECM_mod1_casas <- mse(casas$precio, predict(modelo1_casas, casas))
ECM_mod1_casas
## [1] 1.091732e+15
ECM_mod2_casas <- mse(casas$precio, predict(modelo2_casas, casas))
ECM_mod2_casas
## [1] 1.410897e+15
DEPARTAMENTOS
Para realizar la I de Moran de los residuos se deben calcular y estudiar los residuos de la regresión. Para ello, se analizará si los errores siguen un comportamiento normal:
plot(modelo1_deptos$residuals)
plot(modelo2_deptos$residuals)
ggplot() +
geom_histogram(mapping = aes(x=modelo1_deptos$residuals)) + xlab("OLS residuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot() +
geom_histogram(mapping = aes(x=modelo2_deptos$residuals)) + xlab("OLS residuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot(modelo1_deptos)
plot(modelo2_deptos)
T_lagrange_deptos <-lm.LMtests(modelo1_deptos, nb_deptos, test = "all")
summary(T_lagrange_deptos)
## Lagrange multiplier diagnostics for spatial dependence
## data:
## model: lm(formula = precio ~ bedrooms + bathrooms + area_total +
## area_util + dist_metro + dist_paradero + dist_plaza + dist_parque +
## dist_salud + dist_edu + dist_colegios, data = deptos)
## weights: nb_deptos
##
## statistic parameter p.value
## LMerr 31.02154 1 2.552e-08 ***
## LMlag 0.13249 1 0.71587
## RLMerr 37.40642 1 9.591e-10 ***
## RLMlag 6.51737 1 0.01068 *
## SARMA 37.53890 2 7.056e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finalmente, se calcularán los Errores Cuadráticos Medios de los modelos:
ECM_mod1_deptos <- mse(deptos$precio, predict(modelo1_deptos, deptos))
ECM_mod1_deptos
## [1] 3.900631e+13
ECM_mod2_deptos <- mse(deptos$precio, predict(modelo2_deptos, deptos))
ECM_mod2_deptos
## [1] 4.507409e+13
#PREGUNTA 4: Concluir sobre lo estudiado a lo largo del semestre e indicar cuáles son los elementos más relevantes en el mercado de estudio. # XII. Elementos más relevantes en el mercado de estudio