library(here)
library(glue)
library(janitor)
library(sf)
library(leaflet)
library(magrittr)
library(tidyverse)
tracts <- read_sf("https://opendata.arcgis.com/datasets/63f965c73ddf46429befe1132f7f06e2_15.geojson")
# Airbnb ------------------------------------------------------------------
airbnb <- read_csv(here("scrapy-splash/airbnb/airbnb_cville.csv")) %>%
clean_names() %>%
mutate_at(vars(room_id), as.character) %>%
mutate(url = paste0("https://www.airbnb.com/rooms/", room_id),
site = "AirBnB") %>%
rename(unit_type = bedroom_type)
# lots of dups; multiple price points for the same roomID
airbnb %<>%
group_by(room_id) %>%
mutate(price = mean(price)) %>%
slice(1) %>%
group_by()
# VRBO --------------------------------------------------------------------
# this is a little messier bc I was a lot lazier in the scraper-side processing
vrbo <- read_csv(here("scrapy-splash/vrbo/vrbo_cville.csv")) %>%
clean_names() %>%
filter(!is.na(longitude)) %>%
mutate(details = str_remove(details, ", ,sq. ft.") %>%
str_replace("(\\D+),Sleeps", "\\1,0,Sleeps") %>%
str_replace(",Half Baths: ", "--") %>%
str_replace(": Studio", ": 1") %>%
str_remove(" nights$") %>%
gsub(",\\D+\\: ", ",", .)
) %>%
separate(details,
c("unit_type", "sq_ft", "num_guests", "num_beds", "num_baths", "min_nights"),
sep = ",") %>%
mutate(num_reviews = str_remove(num_reviews, " ?Reviews"),
rating = str_replace(rating, ".* (\\d.?\\d?)/.*", "\\1"),
url = paste0("https://www.vrbo.com/", room_id),
site = "VRBO") %>%
mutate_at(vars(matches("^num"), sq_ft, rating), as.numeric)
# Both --------------------------------------------------------------------
both <- bind_rows(airbnb, vrbo) %>%
mutate(site = fct_rev(site),
row_number = 1:nrow(.),
content = glue::glue("<a href='{url}'>{site}--{room_id}</a>
<br>
row-num:{row_number}"))
both %>%
group_by(longitude, latitude) %>%
count() %>%
filter(n != 1) # no dupes by coordinates!
## # A tibble: 0 x 3
## # Groups: longitude, latitude [0]
## # … with 3 variables: longitude <dbl>, latitude <dbl>, n <int>
write_csv(both, "scraped_rentals.csv")
# Plots -------------------------------------------------------------------