library(tidyverse)
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## -- Attaching packages ---------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.0 v purrr 0.3.2
## v tibble 2.1.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
Cargo el datos Airbnb (flechita a la izquierda alt+tecla menos)
airbnb <- read.csv("https://query.data.world/s/55amvafrknrgkeyeiu54yb2c6u6brc",
stringsAsFactors = FALSE)
CArgo comunas BA
comunas <- st_read('https://bitsandbricks.github.io/data/CABA_comunas.geojson')
## Reading layer `CABA_comunas' from data source `https://bitsandbricks.github.io/data/CABA_comunas.geojson' using driver `GeoJSON'
## Simple feature collection with 15 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -58.53152 ymin: -34.70529 xmax: -58.33514 ymax: -34.52754
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Ahora los dibujo los dos juntos
ggplot() +
geom_sf(data = comunas) +
geom_point(data = airbnb,
aes(x = longitude, y = latitude),
alpha = .3,
color = "orange")
Ahora limpio la base de datos que no tienen datos cargados en filas, porque sino dara error. eso lo logro con !lis.na… etc, filter es x filas, select es para columnas. st as sf, es parte del paquete sf, siempre empieza con st. crs es la proyeccion mercator, la mayoria sera crs=4326. Las comunas de Buenos AIres no tienen esa crs. el datum esta en la iglesia de san jose de flores, ahora se provee el dato de crs que es la proyeccion que se uso. Convierto en espacial mi dataset
airbnb <- airbnb %>%
filter(!is.na(latitude), !is.na(longitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)
ggplot() +
geom_sf(data = comunas) +
geom_sf(data = airbnb, color = "orange", alpha = .3)
Para hacer spatial join, armo un dataframe nuevo, airbnb_con_comunas
airbnb_con_comunas <- st_join(airbnb, comunas)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
airbnb_con_comunas %>% head
## Simple feature collection with 6 features and 28 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -58.41829 ymin: -34.62068 xmax: -58.37914 ymax: -34.59101
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## room_id host_id room_type country city neighborhood
## 1 15125458 95870458 Private room NA NA NA
## 2 1691316 3380366 Private room NA NA NA
## 3 16069975 104686791 Private room NA NA NA
## 4 4470484 2034113 Private room NA NA NA
## 5 3564816 17949594 Private room NA NA NA
## 6 4479962 8875440 Private room NA NA NA
## address reviews overall_satisfaction
## 1 Recoleta, Buenos Aires 23 4.5
## 2 Palermo, Buenos Aires 106 5.0
## 3 Recoleta, Buenos Aires 5 5.0
## 4 Buenos Aires 102 4.5
## 5 San Nicolás, Buenos Aires 20 4.5
## 6 Balvanera, БуÑ\215ноÑ\201-Ð\220йÑ\200еÑ\201 102 4.5
## accommodates bedrooms bathrooms price deleted minstay
## 1 1 1 NA 339 0 NA
## 2 2 1 NA 559 0 NA
## 3 1 1 NA 254 0 NA
## 4 2 1 NA 441 0 NA
## 5 1 1 NA 390 0 NA
## 6 2 1 NA 424 0 NA
## last_modified survey_id
## 1 2017-07-03T17:52:23Z 1
## 2 2017-07-03T17:52:26Z 1
## 3 2017-07-03T17:52:26Z 1
## 4 2017-07-03T17:52:26Z 1
## 5 2017-07-03T17:52:26Z 1
## 6 2017-07-03T17:52:26Z 1
## location coworker_hosted
## 1 0101000020E61000000FB743C362324DC0B1DEA815A64B41C0 NA
## 2 0101000020E6100000780E65A88A354DC08A027D224F4C41C0 NA
## 3 0101000020E610000002D4D4B2B5324DC0B5C2F4BD864C41C0 NA
## 4 0101000020E6100000187C9A9317314DC04293C492724F41C0 NA
## 5 0101000020E61000004FC939B187304DC03D29931ADA4C41C0 NA
## 6 0101000020E6100000A6272CF180324DC0A88FC01F7E4E41C0 NA
## extra_host_languages name property_type
## 1 {en} Habitacion privada en Recoleta Apartment
## 2 {en} Palermo Cozy room wprivate bathroom House
## 3 {en} Habitacion Recoleta, Confort Condominium
## 4 {} The Flan Room- Palacio Nr San Telmo House
## 5 {en} "Antique" room @ El Centro Apartment
## 6 {} Private room - Congreso House
## currency rate_type
## 1 ARS nightly
## 2 ARS nightly
## 3 ARS nightly
## 4 ARS nightly
## 5 ARS nightly
## 6 ARS nightly
## barrios
## 1 RECOLETA
## 2 PALERMO
## 3 RECOLETA
## 4 CONSTITUCION - MONSERRAT - PUERTO MADERO - RETIRO - SAN NICOLAS - SAN TELMO
## 5 CONSTITUCION - MONSERRAT - PUERTO MADERO - RETIRO - SAN NICOLAS - SAN TELMO
## 6 BALVANERA - SAN CRISTOBAL
## perimetro area comunas geometry
## 1 21246.61 6140873 2 POINT (-58.39364 -34.59101)
## 2 21768.07 15772496 14 POINT (-58.41829 -34.59616)
## 3 21246.61 6140873 2 POINT (-58.39617 -34.59786)
## 4 35572.65 17802807 1 POINT (-58.38353 -34.62068)
## 5 35572.65 17802807 1 POINT (-58.37914 -34.60041)
## 6 10486.26 6385991 3 POINT (-58.39456 -34.61322)
con st set geometry le saco lo geografico/geometrico
conteo <- airbnb_con_comunas %>%
group_by(comunas) %>%
summarise(cantidad = n())%>%
st_set_geometry(NULL)
## Warning: Factor `comunas` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `comunas` contains implicit NA, consider using
## `forcats::fct_explicit_na`
hago union entre la tabla conteo y comunas que es espacial y lo puedo hacer con un leftjoin, y se lo agrego a comunas que ya existia
comunas <- left_join(comunas, conteo)
## Joining, by = "comunas"
ggplot()+
geom_sf(data=comunas, aes(fill=cantidad))