Geocoding is converting an address to spatial coordinates. Google has provided a geocoding API to do this for you, however they limit usage to 2,500 addresses a day.

I’ve written this script to gather them over time. Google will let you do as many as you want if you pay them, their prices today are $4-5 per 1,000 records.

Setup

library(ggmap)
library(tidyverse)

input_ds <- '3c2bf79f-8e8d-4600-9732-62941a47215c' #Orders by Street Address

customers <- DomoR::fetch(input_ds)

queriedCustomers <- read.csv("queriedAdresses.csv") %>% select(-1)

latlongs <- queriedCustomers %>%
        mutate_at(vars(lat, lon), funs(replace(., is.na(.),0))) %>%
        #drop_na(lat, lon) %>%
        select(customers_id, lat, lon)

customersNoLatLon <- customers %>%
        group_by(customers_id, entry_street_address,
                 entry_city,
                 entry_state,
                 entry_postcode) %>%
        summarize(ttlRev = sum(orders_total)) %>%
        arrange(desc(ttlRev)) %>%
        left_join(latlongs, by = "customers_id") %>%
        filter(is.na(lat)) %>%
        ungroup()

Query Loop

Loop to do my daily allocated queries.

i <- 1
c <- geocodeQueryCheck()
for(i in 1:c){
        geo <- geocode(paste0(customersNoLatLon$entry_street_address[i],
                      " ",
                      customersNoLatLon$entry_city[i],
                      ", ",
                      customersNoLatLon$entry_state[i],
                      " ",
                      customersNoLatLon$entry_postcode[i]),
               output = "latlon")
        customersNoLatLon$lon[i] <- geo$lon[1]
        customersNoLatLon$lat[i] <- geo$lat[1]
        i = i + 1
}

geocodeQueryCheck()

Save to Disk

Save the known lat/longs so I can build onto the file over time.

newAdditions <- customersNoLatLon[which(!is.na(customersNoLatLon$lat)),] %>%
        ungroup

queriedCustomersNew <- queriedCustomers %>%
        rbind(newAdditions)

write.csv(queriedCustomersNew, "queriedAdresses.csv")

Plot

Now it’s easy to plot the customers. We can also use lat long as a selection filter to look at specific geographic areas and drill down into any level of geography.

dataForMap <- queriedCustomers %>%
        drop_na(lat, lon)

usamap <- map_data("state") %>%
        ggplot(aes(long, lat, group = group)) +
        geom_polygon(fill = "white", colour = "black") + 
        coord_map("conic", lat0 = 30)

latmin <- min(usamap$data$lat)
latmax <- max(usamap$data$lat)
longmin <- min(usamap$data$long)
longmax <- max(usamap$data$long)

dataForMapUSA <- dataForMap %>%
        filter(lat > latmin,
               lat < latmax,
               lon > longmin,
               lon < longmax)

usamap + geom_point(data = dataForMapUSA, aes(x = lon,
                                              y = lat,
                                              #alpha = ttlRev,
                                              group = NULL)) +
        theme(legend.position="none")

usamap + geom_point(data = dataForMapUSA, aes(x = lon,
                                              y = lat,
                                              alpha = ttlRev,
                                              group = NULL)) +
        theme(legend.position="none")

cali <- map_data("county", region = "california") %>%
        ggplot(aes(long, lat, group = group)) +
        geom_polygon(fill = "white", colour = "black") + 
        coord_map("conic", lat0 = 30)

latmin <- min(cali$data$lat)
latmax <- max(cali$data$lat)
longmin <- min(cali$data$long)
longmax <- max(cali$data$long)

dataForMap <- queriedCustomers %>%
        drop_na(lat, lon)

dataForMapCali <- dataForMap %>%
        filter(lat > latmin,
               lat < latmax,
               lon > longmin,
               lon < longmax)

cali + geom_point(data = dataForMapCali, aes(x = lon,
                                              y = lat,
                                              alpha = ttlRev,
                                              group = NULL)) +
        theme(legend.position="none")

greaterLosAngles <- map_data("county", region = "california") %>%
        filter(subregion %in% c("los angeles",
                                "orange")) %>%
                                #"ventura",
                                #"san bernardino",
                                #"riverside")) %>%
        ggplot(aes(long, lat, group = group)) +
        geom_polygon(fill = "white", colour = "black") + 
        coord_map("conic", lat0 = 30)

latmin <- min(greaterLosAngles$data$lat)
latmax <- max(greaterLosAngles$data$lat)
longmin <- min(greaterLosAngles$data$long)
longmax <- max(greaterLosAngles$data$long)

dataForMap <- queriedCustomers %>%
        drop_na(lat, lon)

dataForMapgreaterLosAngles <- dataForMap %>%
        filter(lat > latmin,
               lat < latmax,
               lon > longmin,
               lon < longmax)

greaterLosAngles + 
        geom_point(data = dataForMapgreaterLosAngles,
                              aes(x = lon,
                                  y = lat,
                                  alpha = ttlRev,
                                  group = NULL)) +
        theme(legend.position="none")

library(leaflet)

leaflet() %>%
        addTiles() %>%
        addCircleMarkers(data = dataForMapgreaterLosAngles,
                         radius = 2,
                         lng = ~ lon,
                         lat = ~ lat,
                         popup = ~paste(customers_id))