Airbnb search filters

This compares the overlap between two diferent search configurations for Airbnb:

Suprisinly small overlap in properties (62) between the two configurations.

Code


source(here::here("scrapy-splash/explore.R"))


airbnb2 <- airbnb_readin("scrapy-splash/airbnb-dates/airbnb_dates_cville.csv") %>% 
  mutate(filters = "map_date",
         site = "airbnb")

airbnb2 %<>% airbnb_dedup() # 179

sum(airbnb2$room_id %in% airbnb$room_id)
## [1] 68
sum(airbnb$room_id %in% airbnb2$room_id)
## [1] 68

both <- airbnb %>% 
  mutate(filters = "map") %>% 
  bind_rows(airbnb2) %>% 
  mutate(site = fct_rev(site),
         row_number = 1:nrow(.),
         content = glue::glue("<a href='{url}'>{filters}--{room_id}</a>
                              <br>
                              row-num:{row_number}")) %>%
  select_if(~ !all(is.na(.)))

both %>%
  group_by(longitude, latitude, room_id) %>%
  count() %>%
  filter(n > 1) # 62 but 68 by lon/lat alone
## # A tibble: 62 x 4
## # Groups:   longitude, latitude, room_id [62]
##    longitude latitude room_id      n
##        <dbl>    <dbl> <chr>    <int>
##  1     -78.5     38.0 4151622      2
##  2     -78.5     38.0 24391450     2
##  3     -78.5     38.0 19912330     2
##  4     -78.5     38.0 4961336      2
##  5     -78.5     38.0 12683942     2
##  6     -78.5     38.0 11733216     2
##  7     -78.5     38.0 20846106     2
##  8     -78.5     38.0 15480263     2
##  9     -78.5     38.0 23336579     2
## 10     -78.5     38.0 22097621     2
## # … with 52 more rows
                # may be able to learn aboug geocode algo by investigating semi dupes

# de-duplicate
both %<>%
  group_by(longitude, latitude, room_id) %>% 
  mutate(filters = ifelse(n() > 1, "both", filters)) %>%
  slice(1)

Barchart


# both$filters %>% as.factor() %>% fct_relevel("map")

both$filters %>%
  tabyl() %>% 
  rename(., filters = `.`) %>% 
  mutate(filters = as.factor(filters)) %>% 
  ggplot(aes("", n, fill = filters)) +
  geom_col(alpha = .25, color = "black") +
  geom_label(aes(label = n), position = "stack", alpha = .5, hjust = 1, size = 6, show.legend = FALSE) +
  scale_fill_viridis_d() +
  coord_flip() +
  labs(title = "Unique Airbnb rentals",
       x = NULL,
       y = "#",
       fill = "Search filters")

Zoning of all rentals

Very low overlap with areas zoned non-residential (all zones exlcuding R-1, R-1S, R-2, R-2U, R-3 )

Code



zoning <- read_sf("https://opendata.arcgis.com/datasets/b06e72d50d0f4715b812c1fd4a04184d_49.geojson") %>% 
  filter(!grepl("^R", ZONE))

# wrapper for sf object conversion
sfize <- . %>% 
  st_as_sf(coords = c("longitude", "latitude")) %>% 
  st_set_crs(st_crs(zoning))

zoning %>%
  mutate(area = st_area(geometry)) %>%
  group_by(ZONE) %>%
  summarise(sum(area))
## Simple feature collection with 17 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -78.51985 ymin: 38.01142 xmax: -78.4547 ymax: 38.05947
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## # A tibble: 17 x 3
##    ZONE  `sum(area)`                                                    geometry
##  * <chr>       [m^2]                                          <MULTIPOLYGON [°]>
##  1 " "    68184.7586 (((-78.49923 38.01248, -78.49941 38.0125, -78.49955 38.012…
##  2 "B-1"  92830.6656 (((-78.48658 38.02387, -78.48576 38.02329, -78.48615 38.02…
##  3 "B-2"   2334.2045 (((-78.51934 38.02547, -78.5193 38.02572, -78.51927 38.025…
##  4 "B-3"  66732.0086 (((-78.5018 38.02945, -78.50164 38.02966, -78.5013 38.03, …
##  5 "CC"     636.3929 (((-78.46096 38.03329, -78.46113 38.0335, -78.46118 38.033…
##  6 "CH"    2025.6470 (((-78.49336 38.02803, -78.49332 38.02811, -78.49356 38.02…
##  7 "D"     4763.8640 (((-78.48236 38.03257, -78.48259 38.03204, -78.48193 38.03…
##  8 "DN"   25593.1257 (((-78.47721 38.03226, -78.47731 38.03229, -78.47738 38.03…
##  9 "HW"   35324.3861 (((-78.46902 38.01536, -78.46909 38.01539, -78.46954 38.01…
## 10 "IC"     407.2787 (((-78.48331 38.03821, -78.48304 38.03845, -78.48296 38.03…
## 11 "M-I"  34831.7322 (((-78.46964 38.02452, -78.46954 38.02446, -78.46946 38.02…
## 12 "MR"   47597.3650 (((-78.49365 38.02201, -78.49344 38.02228, -78.49315 38.02…
## 13 "PUD"  29218.5966 (((-78.4993 38.013, -78.49945 38.0129, -78.49985 38.01294,…
## 14 "UHD"  21326.6566 (((-78.50164 38.02966, -78.5013 38.03, -78.50157 38.03012,…
## 15 "URB" 145440.1253 (((-78.51135 38.04391, -78.50965 38.04316, -78.50845 38.04…
## 16 "UV"  388381.4460 (((-78.51925 38.02852, -78.51918 38.02852, -78.51909 38.02…
## 17 "WMN"    108.2400 (((-78.49162 38.03239, -78.49158 38.03236, -78.49156 38.03…

rentals <- bind_rows(both, vrbo) #480

rentals %<>%
  sfize() %>% 
  mutate(zone_id = st_within(., zoning) %>% as.numeric(),
         zone_bool = ifelse(is.na(zone_id), "residential or unzoned", "non-residential"),
         content = glue::glue("<a href='{url}'>{site}--{room_id}</a>"))

Plots

rentals %>%
  st_set_geometry(NULL) %>% 
  tabyl(zone_bool, site) %>%
  pivot_longer(-zone_bool) %>%
  mutate(zone_bool = fct_rev(zone_bool)) %>% 
  ggplot(aes(name, value, fill = zone_bool)) +
  geom_col(alpha = .25, color = "black") +
  geom_label(aes(label = value), position = "stack", alpha = .5, hjust = 1, size = 6, show.legend = FALSE) +
  scale_fill_viridis_d(option = "A") +
  coord_flip() +
  labs(title = "Zoning of all rentals",
       x = "Site",
       y = "#",
       fill = "Zoning")

Map of non-residential zones (black) and rentals (yellow)

leaflet(zoning) %>% 
  addTiles() %>% 
  addPolygons(label = ~ ZONE, fill = "#000004FF", color = "#000004FF", fillOpacity = .8) %>% 
  addCircleMarkers(data = rentals, fillColor = "#000000", color = "#FDE725FF", radius = 5, popup = ~content, fillOpacity = .5)