This compares the overlap between two diferent search configurations for Airbnb:
Suprisinly small overlap in properties (62) between the two configurations.
source(here::here("scrapy-splash/explore.R"))
airbnb2 <- airbnb_readin("scrapy-splash/airbnb-dates/airbnb_dates_cville.csv") %>%
mutate(filters = "map_date",
site = "airbnb")
airbnb2 %<>% airbnb_dedup() # 179
sum(airbnb2$room_id %in% airbnb$room_id)
## [1] 68
sum(airbnb$room_id %in% airbnb2$room_id)
## [1] 68
both <- airbnb %>%
mutate(filters = "map") %>%
bind_rows(airbnb2) %>%
mutate(site = fct_rev(site),
row_number = 1:nrow(.),
content = glue::glue("<a href='{url}'>{filters}--{room_id}</a>
<br>
row-num:{row_number}")) %>%
select_if(~ !all(is.na(.)))
both %>%
group_by(longitude, latitude, room_id) %>%
count() %>%
filter(n > 1) # 62 but 68 by lon/lat alone
## # A tibble: 62 x 4
## # Groups: longitude, latitude, room_id [62]
## longitude latitude room_id n
## <dbl> <dbl> <chr> <int>
## 1 -78.5 38.0 4151622 2
## 2 -78.5 38.0 24391450 2
## 3 -78.5 38.0 19912330 2
## 4 -78.5 38.0 4961336 2
## 5 -78.5 38.0 12683942 2
## 6 -78.5 38.0 11733216 2
## 7 -78.5 38.0 20846106 2
## 8 -78.5 38.0 15480263 2
## 9 -78.5 38.0 23336579 2
## 10 -78.5 38.0 22097621 2
## # … with 52 more rows
# may be able to learn aboug geocode algo by investigating semi dupes
# de-duplicate
both %<>%
group_by(longitude, latitude, room_id) %>%
mutate(filters = ifelse(n() > 1, "both", filters)) %>%
slice(1)
# both$filters %>% as.factor() %>% fct_relevel("map")
both$filters %>%
tabyl() %>%
rename(., filters = `.`) %>%
mutate(filters = as.factor(filters)) %>%
ggplot(aes("", n, fill = filters)) +
geom_col(alpha = .25, color = "black") +
geom_label(aes(label = n), position = "stack", alpha = .5, hjust = 1, size = 6, show.legend = FALSE) +
scale_fill_viridis_d() +
coord_flip() +
labs(title = "Unique Airbnb rentals",
x = NULL,
y = "#",
fill = "Search filters")
pal <- colorFactor("viridis", as.factor(both$filters))
sfize(both) %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(color = ~pal(filters), fillColor = "black", radius = 5, popup = ~content) %>%
addLegend("topright", pal, ~filters)
Very low overlap with areas zoned non-residential (all zones exlcuding R-1, R-1S, R-2, R-2U, R-3 )
zoning <- read_sf("https://opendata.arcgis.com/datasets/b06e72d50d0f4715b812c1fd4a04184d_49.geojson") %>%
filter(!grepl("^R", ZONE))
# wrapper for sf object conversion
sfize <- . %>%
st_as_sf(coords = c("longitude", "latitude")) %>%
st_set_crs(st_crs(zoning))
zoning %>%
mutate(area = st_area(geometry)) %>%
group_by(ZONE) %>%
summarise(sum(area))
## Simple feature collection with 17 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -78.51985 ymin: 38.01142 xmax: -78.4547 ymax: 38.05947
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## # A tibble: 17 x 3
## ZONE `sum(area)` geometry
## * <chr> [m^2] <MULTIPOLYGON [°]>
## 1 " " 68184.7586 (((-78.49923 38.01248, -78.49941 38.0125, -78.49955 38.012…
## 2 "B-1" 92830.6656 (((-78.48658 38.02387, -78.48576 38.02329, -78.48615 38.02…
## 3 "B-2" 2334.2045 (((-78.51934 38.02547, -78.5193 38.02572, -78.51927 38.025…
## 4 "B-3" 66732.0086 (((-78.5018 38.02945, -78.50164 38.02966, -78.5013 38.03, …
## 5 "CC" 636.3929 (((-78.46096 38.03329, -78.46113 38.0335, -78.46118 38.033…
## 6 "CH" 2025.6470 (((-78.49336 38.02803, -78.49332 38.02811, -78.49356 38.02…
## 7 "D" 4763.8640 (((-78.48236 38.03257, -78.48259 38.03204, -78.48193 38.03…
## 8 "DN" 25593.1257 (((-78.47721 38.03226, -78.47731 38.03229, -78.47738 38.03…
## 9 "HW" 35324.3861 (((-78.46902 38.01536, -78.46909 38.01539, -78.46954 38.01…
## 10 "IC" 407.2787 (((-78.48331 38.03821, -78.48304 38.03845, -78.48296 38.03…
## 11 "M-I" 34831.7322 (((-78.46964 38.02452, -78.46954 38.02446, -78.46946 38.02…
## 12 "MR" 47597.3650 (((-78.49365 38.02201, -78.49344 38.02228, -78.49315 38.02…
## 13 "PUD" 29218.5966 (((-78.4993 38.013, -78.49945 38.0129, -78.49985 38.01294,…
## 14 "UHD" 21326.6566 (((-78.50164 38.02966, -78.5013 38.03, -78.50157 38.03012,…
## 15 "URB" 145440.1253 (((-78.51135 38.04391, -78.50965 38.04316, -78.50845 38.04…
## 16 "UV" 388381.4460 (((-78.51925 38.02852, -78.51918 38.02852, -78.51909 38.02…
## 17 "WMN" 108.2400 (((-78.49162 38.03239, -78.49158 38.03236, -78.49156 38.03…
rentals <- bind_rows(both, vrbo) #480
rentals %<>%
sfize() %>%
mutate(zone_id = st_within(., zoning) %>% as.numeric(),
zone_bool = ifelse(is.na(zone_id), "residential or unzoned", "non-residential"),
content = glue::glue("<a href='{url}'>{site}--{room_id}</a>"))
rentals %>%
st_set_geometry(NULL) %>%
tabyl(zone_bool, site) %>%
pivot_longer(-zone_bool) %>%
mutate(zone_bool = fct_rev(zone_bool)) %>%
ggplot(aes(name, value, fill = zone_bool)) +
geom_col(alpha = .25, color = "black") +
geom_label(aes(label = value), position = "stack", alpha = .5, hjust = 1, size = 6, show.legend = FALSE) +
scale_fill_viridis_d(option = "A") +
coord_flip() +
labs(title = "Zoning of all rentals",
x = "Site",
y = "#",
fill = "Zoning")
leaflet(zoning) %>%
addTiles() %>%
addPolygons(label = ~ ZONE, fill = "#000004FF", color = "#000004FF", fillOpacity = .8) %>%
addCircleMarkers(data = rentals, fillColor = "#000000", color = "#FDE725FF", radius = 5, popup = ~content, fillOpacity = .5)