Geocoding is converting an address to spatial coordinates. Google has provided a geocoding API to do this for you, however they limit usage to 2,500 addresses a day.
I’ve written this script to gather them over time. Google will let you do as many as you want if you pay them, their prices today are $4-5 per 1,000 records.
library(ggmap)
library(tidyverse)
input_ds <- '3c2bf79f-8e8d-4600-9732-62941a47215c' #Orders by Street Address
customers <- DomoR::fetch(input_ds)
queriedCustomers <- read.csv("queriedAdresses.csv") %>% select(-1)
latlongs <- queriedCustomers %>%
mutate_at(vars(lat, lon), funs(replace(., is.na(.),0))) %>%
#drop_na(lat, lon) %>%
select(customers_id, lat, lon)
customersNoLatLon <- customers %>%
group_by(customers_id, entry_street_address,
entry_city,
entry_state,
entry_postcode) %>%
summarize(ttlRev = sum(orders_total)) %>%
arrange(desc(ttlRev)) %>%
left_join(latlongs, by = "customers_id") %>%
filter(is.na(lat)) %>%
ungroup()Loop to do my daily allocated queries.
i <- 1
c <- geocodeQueryCheck()
for(i in 1:c){
geo <- geocode(paste0(customersNoLatLon$entry_street_address[i],
" ",
customersNoLatLon$entry_city[i],
", ",
customersNoLatLon$entry_state[i],
" ",
customersNoLatLon$entry_postcode[i]),
output = "latlon")
customersNoLatLon$lon[i] <- geo$lon[1]
customersNoLatLon$lat[i] <- geo$lat[1]
i = i + 1
}
geocodeQueryCheck()Save the known lat/longs so I can build onto the file over time.
newAdditions <- customersNoLatLon[which(!is.na(customersNoLatLon$lat)),] %>%
ungroup
queriedCustomersNew <- queriedCustomers %>%
rbind(newAdditions)
write.csv(queriedCustomersNew, "queriedAdresses.csv")Now it’s easy to plot the customers. We can also use lat long as a selection filter to look at specific geographic areas and drill down into any level of geography.
dataForMap <- queriedCustomers %>%
drop_na(lat, lon)
usamap <- map_data("state") %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_map("conic", lat0 = 30)
latmin <- min(usamap$data$lat)
latmax <- max(usamap$data$lat)
longmin <- min(usamap$data$long)
longmax <- max(usamap$data$long)
dataForMapUSA <- dataForMap %>%
filter(lat > latmin,
lat < latmax,
lon > longmin,
lon < longmax)
usamap + geom_point(data = dataForMapUSA, aes(x = lon,
y = lat,
#alpha = ttlRev,
group = NULL)) +
theme(legend.position="none")usamap + geom_point(data = dataForMapUSA, aes(x = lon,
y = lat,
alpha = ttlRev,
group = NULL)) +
theme(legend.position="none")cali <- map_data("county", region = "california") %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_map("conic", lat0 = 30)
latmin <- min(cali$data$lat)
latmax <- max(cali$data$lat)
longmin <- min(cali$data$long)
longmax <- max(cali$data$long)
dataForMap <- queriedCustomers %>%
drop_na(lat, lon)
dataForMapCali <- dataForMap %>%
filter(lat > latmin,
lat < latmax,
lon > longmin,
lon < longmax)
cali + geom_point(data = dataForMapCali, aes(x = lon,
y = lat,
alpha = ttlRev,
group = NULL)) +
theme(legend.position="none")greaterLosAngles <- map_data("county", region = "california") %>%
filter(subregion %in% c("los angeles",
"orange")) %>%
#"ventura",
#"san bernardino",
#"riverside")) %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_map("conic", lat0 = 30)
latmin <- min(greaterLosAngles$data$lat)
latmax <- max(greaterLosAngles$data$lat)
longmin <- min(greaterLosAngles$data$long)
longmax <- max(greaterLosAngles$data$long)
dataForMap <- queriedCustomers %>%
drop_na(lat, lon)
dataForMapgreaterLosAngles <- dataForMap %>%
filter(lat > latmin,
lat < latmax,
lon > longmin,
lon < longmax)
greaterLosAngles +
geom_point(data = dataForMapgreaterLosAngles,
aes(x = lon,
y = lat,
alpha = ttlRev,
group = NULL)) +
theme(legend.position="none")library(leaflet)
leaflet() %>%
addTiles() %>%
addCircleMarkers(data = dataForMapgreaterLosAngles,
radius = 2,
lng = ~ lon,
lat = ~ lat,
popup = ~paste(customers_id))