This document records how the data was downloaded/scraped from Redfin. The size of data was not large so it did not made too much burden for their server. However, this approach is not a standard approach to collect data and should not be used on large scales or too frequently.
setwd('/Users/Nan/Documents/property/')
library(RSelenium)
checkForServer()
startServer()
remDr <- remoteDriver(browserName = 'firefox',extraCapabilities =
makeFirefoxProfile(list(firefox.binary.path = "/Applications/Firefox.app/Contents/MacOS/firefox",
browser.helperApps.neverAsk.saveToDisk="text/csv",
browser.download.folderList = 2,
browser.download.manager.showWhenStarting = FALSE,
browser.download.dir="/Users/Nan/Documents/property/data_redfin")))
remDr$open()
for(i in seq(47.6,47.78,by=0.01)){
for(j in seq(-122.37,-122.27,by=0.01)){
myurl = paste0('https://www.redfin.com/city/16163/WA/Seattle/filter#uipt=1%2C2%2C3&sf=&num_beds=2&sold_within_days=2000&min_listing_approx_size=700&max_listing_approx_size=2000&lat=',i,'&long=',j,'&zoomLevel=16')
remDr$navigate(myurl)
Sys.sleep(10)
find <- try(webElem <- remDr$findElement(using='xpath','//a[@class="downloadLink"]'))
if(class(find) != "try-error"){
webElem$clickElement()
}
}
}
remDr$close()
down_csv = list.files("/Users/Nan/Downloads/")
down_csv = down_csv[grep('redfin.*',down_csv)]
all = data.frame()
for(file in down_csv){
f_in = read.csv(paste0("/Users/Nan/Downloads/",file))
all = rbind(all,f_in)
}
all = unique(all)
write.csv(all,"data_redfin/seattle_2000day_2b_700min_2000max_160125.csv")