# 1) Import data -----------------------------------------------------------
city_name <- "Duluth, GA"
included_types <- c("restaurant","korean_restaurant") #we choose restaurant and korean restaurant as POI types
raw_path <- "duluth_places_restaurant_korean_restaurant_ALL.rds"
stopifnot(file.exists(raw_path))
poi_raw2 <- readRDS(raw_path)
# 2) Tidy data -------------------------------------------------------------
library(tidyr)
library(kableExtra)
# 2a. Remove duplicated rows (by Google Place 'id')
n0 <- nrow(poi_raw2)
poi_step1 <- dplyr::distinct(poi_raw2, id, .keep_all = TRUE)
cat("(removed", n0 - nrow(poi_step1), ")\n")
## (removed 0 )
# 2b. Flatten/unnest list-columns and collapse 'types' to a single string
is_listcol <- purrr::map_lgl(poi_step1, is.list)
list_cols <- names(poi_step1)[is_listcol]
collapse_types <- function(x) {
if (is.null(x) || length(x) == 0) return(NA_character_)
paste(unique(as.character(x)), collapse="|")
}
poi_step2 <- poi_step1
# Collapse 'types' if present
if ("types" %in% names(poi_step2)) {
poi_step2 <- poi_step2 |>
dplyr::mutate(types = purrr::map_chr(types, collapse_types))
}
#2c. Handle missing values
key_cols <- c("id","name","formatted_address","lat","lon")
missing_before <- sum(!stats::complete.cases(poi_step2[, intersect(key_cols, names(poi_step2)), drop=FALSE]))
poi_step3 <- poi_step2 |>
dplyr::filter(
!is.na(id),
!is.na(name) & nzchar(name),
!is.na(formatted_address) & nzchar(formatted_address),
!is.na(lat), !is.na(lon)
)
missing_after <- sum(!stats::complete.cases(poi_step3[, intersect(key_cols, names(poi_step3)), drop=FALSE]))
cat("[NA handling] Rows after drop NAs is", nrow(poi_step3), "\n")
## [NA handling] Rows after drop NAs is 148
# 2d. Filter by location: keep only POIs inside the city boundary
if (!exists("city_ll_4326")) {
ga_places <- tigris::places(state = "GA", year = 2023)
city_ll <- subset(ga_places, NAME == "Duluth")
stopifnot(nrow(city_ll) == 1)
city_ll_4326 <- sf::st_transform(city_ll, 4326)
}
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
poi_step3_sf <- sf::st_as_sf(poi_step3, coords = c("lon","lat"), crs = 4326, remove = FALSE)
inside <- sf::st_within(poi_step3_sf, city_ll_4326)
keep <- lengths(inside) > 0
n_before_city <- nrow(poi_step3_sf)
poi_clean_sf <- poi_step3_sf[keep, ]
cat("[City filter] Kept", nrow(poi_clean_sf), "of", n_before_city, "rows within", city_name, "\n")
## [City filter] Kept 142 of 148 rows within Duluth, GA
# Final tidy table (drop geometry; order useful columns first)
poi_clean <- poi_clean_sf |>
sf::st_drop_geometry() |>
dplyr::select(
id, name, formatted_address,
lat, lon, types, price_level, rating, user_ratings_total,
dplyr::everything()
)
# 3) Show the cleaned POI data --------------------------------------------------
# Print the first 10 rows
try({
poi_clean |>
head(10) |>
kableExtra::kable() |>
kableExtra::kable_styling(full_width = FALSE)
}, silent = TRUE)
id | name | formatted_address | lat | lon | types | price_level | rating | user_ratings_total |
---|---|---|---|---|---|---|---|---|
ChIJAQvhCUCj9YgR99qEjEINX10 | Mazaj Atlanta | 3312 Peachtree Industrial Blvd #1, Duluth, GA 30096, USA | 34.01455 | -84.16455 | middle_eastern_restaurant|restaurant|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 4.7 | 873 |
ChIJB9OJaz2Z9YgR4jtaD9WHVHw | QuikTrip | 3195 Peachtree Industrial Blvd, Duluth, GA 30097, USA | 34.02114 | -84.16085 | gas_station|convenience_store|meal_takeaway|breakfast_restaurant|liquor_store|food_store|restaurant|food|store|point_of_interest|establishment | PRICE_LEVEL_INEXPENSIVE | 4.1 | 94 |
ChIJQdg00piY9YgR5Q5tfiKUfT4 | Gokul Sweets | 4315 Abbotts Bridge Rd #3&4, Duluth, GA 30097, USA | 34.02076 | -84.16248 | indian_restaurant|vegan_restaurant|vegetarian_restaurant|dessert_restaurant|dessert_shop|confectionery|asian_grocery_store|grocery_store|food_store|restaurant|food|store|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 3.7 | 1463 |
ChIJ00A6K8qj9YgRj9z38ovoDpY | Kang’s Table (강스테이블) Duluth |3312 | Peachtree Industrial Blvd Suite-5, Duluth, GA 30096, USA | 34. | 1422| -84 | 16489|rest | urant|korean_restaurant|food|point_of_interest|establishment |PRIC | _LEVEL_MODERATE | | .2| | 133| |
ChIJE5Ar3P2Z9YgRB7-GQYg6aLQ | THE BAKE | 3294 Peachtree Industrial Blvd #1000, Duluth, GA 30096, USA | 34.01643 | -84.16289 | brunch_restaurant|coffee_shop|bakery|cafe|food_store|restaurant|food|store|point_of_interest|establishment | NA | 4.5 | 264 |
ChIJizPHG6CY9YgRLaAzAfFbE2Q | Kurt’s Euro Bistro | 3305 Peachtree Industrial Blvd #100, Duluth, GA 30096, USA | 34.01577 | -84.16489 | steak_house|bar|restaurant|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 4.7 | 946 |
ChIJPXds5J-Y9YgReYY4QFDMH7A | Chacko’s Udipi Indian Cuisine | 3300 Peachtree Industrial Blvd J, Duluth, GA 30096, USA | 34.01538 | -84.16375 | indian_restaurant|restaurant|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 3.9 | 1279 |
ChIJIY1rg66Z9YgRDh-LrLj1ynQ | Armando’s Taqueria | 4190 Abbotts Bridge Rd, Duluth, GA 30096, USA | 34.01999 | -84.15901 | mexican_restaurant|restaurant|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 4.5 | 532 |
ChIJWZBMCpyY9YgRWG4P2dW3asY | Armando’s Caribe | 3170 Peachtree Industrial Blvd, Duluth, GA 30097, USA | 34.01972 | -84.15849 | mexican_restaurant|restaurant|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 4.5 | 1278 |
ChIJRdvQZw6i9YgR1ZnOmipYAck | Roma Italiano Ristorante | 3455 Peachtree Industrial Blvd #840, Duluth, GA 30096, USA | 34.00644 | -84.17075 | restaurant|catering_service|italian_restaurant|food_delivery|food|point_of_interest|establishment | PRICE_LEVEL_MODERATE | 4.5 | 782 |
library(dplyr); library(stringr); library(sf); library(tmap)
# Identify POI type per row
# If a row's types contains "korean_restaurant", mark as "korean_restaurant" else "restaurant"
poi_exp <- poi_clean %>%
mutate(
poi_type = if_else(str_detect(types %||% "", "\\bkorean_restaurant\\b"),
"korean_restaurant","restaurant")
)
# Basic summaries
sum_by_type <- poi_exp %>% count(poi_type, name = "n")
avg_rating <- poi_exp %>% group_by(poi_type) %>% summarize(avg = mean(rating, na.rm=TRUE), .groups="drop")
cor_rr <- suppressWarnings(cor(poi_exp$rating, log1p(poi_exp$user_ratings_total), use="complete.obs"))
# Price level vs rating
price_stats <- poi_exp %>%
filter(!is.na(price_level)) %>%
group_by(price_level) %>%
summarize(avg = mean(rating, na.rm=TRUE), n = dplyr::n(), .groups="drop")
# Pick “best” POI: highest rating, tie-break by reviews
best_idx <- with(poi_exp, order(-rating, -user_ratings_total))[1]
best_row <- if (length(best_idx) && !is.na(best_idx)) poi_exp[best_idx, ] else NULL
# Map by type (two colors)
poi_exp_sf <- st_as_sf(poi_exp, coords=c("lon","lat"), crs=4326, remove=FALSE)
tmap_mode("view")
tm_shape(city_ll_4326) + tm_polygons(alpha=.1, border.col="black") +
tm_shape(poi_exp_sf) + tm_dots(col="poi_type",
popup.vars=c("name","rating","user_ratings_total","price_level"))
# Simple scatter: rating vs. log(1+reviews)
par(mar=c(4,4,1,1))
x <- log1p(poi_exp$user_ratings_total)
y <- poi_exp$rating
plot(x, y, pch=19, cex=0.7, xlab="log(1 + review count)", ylab="Rating")
abline(lm(y ~ x), lty=2)
par(mar=c(4,4,1,1))
grp <- ifelse(grepl("\\bkorean_restaurant\\b", poi_exp$types), "korean_restaurant", "restaurant")
boxplot(poi_exp$rating ~ grp, xlab="POI type", ylab="Rating")
# order price levels if present (e.g., PRICE_LEVEL_INEXPENSIVE < MODERATE < EXPENSIVE)
df <- subset(poi_exp, !is.na(rating) & !is.na(price_level))
lvlmap <- c("PRICE_LEVEL_INEXPENSIVE"="1-Inexpensive",
"PRICE_LEVEL_MODERATE" ="2-Moderate",
"PRICE_LEVEL_EXPENSIVE" ="3-Expensive",
"PRICE_LEVEL_VERY_EXPENSIVE"="4-Very Expensive")
df$price_ord <- factor(lvlmap[df$price_level],
levels=c("1-Inexpensive","2-Moderate","3-Expensive","4-Very Expensive"))
df <- droplevels(df) # drop empty levels
par(mar=c(7,4,1,1))
boxplot(rating ~ price_ord, data=df, xlab="", ylab="Rating", las=2, outline=FALSE)
# add jittered points
x <- as.numeric(df$price_ord)
points(jitter(x, amount=.12), df$rating, pch=19, cex=.7)
mtext("Price level", side=1, line=5)
# prerequisites
stopifnot(exists("city_ll_4326"))
#use poi_exp_sf if you already made it; otherwise build from poi_clean
if (!exists("poi_exp_sf")) {
stopifnot(exists("poi_clean"))
poi_exp_sf <- sf::st_as_sf(poi_clean, coords = c("lon","lat"), crs = 4326, remove = FALSE)
}
# work in meters
cell_m <- 500
pts_m <- sf::st_transform(poi_exp_sf, 3857)
city_m <- sf::st_transform(city_ll_4326, 3857)
# grid over city + id
grid_m <- sf::st_make_grid(city_m, cellsize = cell_m, what = "polygons")
grid_sf <- sf::st_as_sf(grid_m) |>
sf::st_intersection(city_m) |>
dplyr::mutate(cell_id = dplyr::row_number())
# count points per cell (robust join)
pts_join <- sf::st_join(pts_m, dplyr::select(grid_sf, cell_id), join = sf::st_within, left = TRUE)
counts <- pts_join |>
sf::st_drop_geometry() |>
dplyr::filter(!is.na(cell_id)) |>
dplyr::count(cell_id, name = "n_poi")
grid_sf <- grid_sf |>
dplyr::left_join(counts, by = "cell_id") |>
dplyr::mutate(n_poi = tidyr::replace_na(n_poi, 0L))
# outputs for mapping
grid_4326 <- sf::st_transform(grid_sf, 4326)
hot_4326 <- dplyr::filter(grid_4326, n_poi > 0)
# assumes you already have: grid_sf (3857 with n_poi), city_ll_4326
grid_4326 <- sf::st_transform(grid_sf, 4326)
# keep only cells with ≥1 POI
hot_4326 <- dplyr::filter(grid_4326, n_poi > 0)
tmap::tmap_mode("view")
tmap::tm_shape(city_ll_4326) +
tmap::tm_borders(col = "black", lwd = 1.2) + # outline only (no dark fill)
tmap::tm_shape(hot_4326) +
tmap::tm_polygons(
"n_poi",
palette = "magma",
style = "fixed",
breaks = c(1, 3, 6, 10, Inf),
labels = c("1–2", "3–5", "6–9", "10+"),
alpha = 0.9,
border.col = "white",
lwd = 0.25,
title = "POIs per 500 m"
) +
tmap::tm_layout(legend.outside = TRUE, frame = FALSE)
What can we learn from this map is that most restaurants line up along Peachtree Industrial Blvd, Buford Hwy (US-23), and the Duluth Hwy (GA-120) / Main St corridor and along,near the rail line and major shopping centers. These corridors concentrate both general restaurants and Korean spots, while residential edges show few or no POIs. In short: activity clusters on the main commercial spines, outside those spines, these location thins out.