Tidying our POI data

# 1) Import data -----------------------------------------------------------
city_name <- "Duluth, GA"
included_types <- c("restaurant","korean_restaurant") #we choose restaurant and korean restaurant as POI types

raw_path <- "duluth_places_restaurant_korean_restaurant_ALL.rds"
stopifnot(file.exists(raw_path))
poi_raw2 <- readRDS(raw_path)


# 2) Tidy data -------------------------------------------------------------

library(tidyr)
library(kableExtra)

# 2a. Remove duplicated rows (by Google Place 'id')
n0 <- nrow(poi_raw2)
poi_step1 <- dplyr::distinct(poi_raw2, id, .keep_all = TRUE)
cat("(removed", n0 - nrow(poi_step1), ")\n")
## (removed 0 )
# 2b. Flatten/unnest list-columns and collapse 'types' to a single string
is_listcol <- purrr::map_lgl(poi_step1, is.list)
list_cols  <- names(poi_step1)[is_listcol]

collapse_types <- function(x) {
  if (is.null(x) || length(x) == 0) return(NA_character_)
  paste(unique(as.character(x)), collapse="|")
}

poi_step2 <- poi_step1

# Collapse 'types' if present
if ("types" %in% names(poi_step2)) {
  poi_step2 <- poi_step2 |> 
    dplyr::mutate(types = purrr::map_chr(types, collapse_types))
}

#2c. Handle missing values
key_cols <- c("id","name","formatted_address","lat","lon")
missing_before <- sum(!stats::complete.cases(poi_step2[, intersect(key_cols, names(poi_step2)), drop=FALSE]))

poi_step3 <- poi_step2 |>
  dplyr::filter(
    !is.na(id),
    !is.na(name) & nzchar(name),
    !is.na(formatted_address) & nzchar(formatted_address),
    !is.na(lat), !is.na(lon)
  )

missing_after <- sum(!stats::complete.cases(poi_step3[, intersect(key_cols, names(poi_step3)), drop=FALSE]))
cat("[NA handling] Rows after drop NAs is", nrow(poi_step3), "\n")
## [NA handling] Rows after drop NAs is 148
# 2d. Filter by location: keep only POIs inside the city boundary
if (!exists("city_ll_4326")) {
  ga_places <- tigris::places(state = "GA", year = 2023)
  city_ll   <- subset(ga_places, NAME == "Duluth")
  stopifnot(nrow(city_ll) == 1)
  city_ll_4326 <- sf::st_transform(city_ll, 4326)
}
##   |                                                                              |                                                                      |   0%  |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |==                                                                    |   4%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |=======================                                               |  34%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |===================================================                   |  74%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
poi_step3_sf <- sf::st_as_sf(poi_step3, coords = c("lon","lat"), crs = 4326, remove = FALSE)
inside <- sf::st_within(poi_step3_sf, city_ll_4326)
keep   <- lengths(inside) > 0

n_before_city <- nrow(poi_step3_sf)
poi_clean_sf  <- poi_step3_sf[keep, ]
cat("[City filter] Kept", nrow(poi_clean_sf), "of", n_before_city, "rows within", city_name, "\n")
## [City filter] Kept 142 of 148 rows within Duluth, GA
# Final tidy table (drop geometry; order useful columns first)
poi_clean <- poi_clean_sf |>
  sf::st_drop_geometry() |>
  dplyr::select(
    id, name, formatted_address,
    lat, lon, types, price_level, rating, user_ratings_total,
    dplyr::everything()
  )

# 3) Show the cleaned POI data --------------------------------------------------
# Print the first 10 rows
try({
  poi_clean |>
    head(10) |>
    kableExtra::kable() |>
    kableExtra::kable_styling(full_width = FALSE)
}, silent = TRUE)
id name formatted_address lat lon types price_level rating user_ratings_total
ChIJAQvhCUCj9YgR99qEjEINX10 Mazaj Atlanta 3312 Peachtree Industrial Blvd #1, Duluth, GA 30096, USA 34.01455 -84.16455 middle_eastern_restaurant&#124;restaurant&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 4.7 873
ChIJB9OJaz2Z9YgR4jtaD9WHVHw QuikTrip 3195 Peachtree Industrial Blvd, Duluth, GA 30097, USA 34.02114 -84.16085 gas_station&#124;convenience_store&#124;meal_takeaway&#124;breakfast_restaurant&#124;liquor_store&#124;food_store&#124;restaurant&#124;food&#124;store&#124;point_of_interest&#124;establishment PRICE_LEVEL_INEXPENSIVE 4.1 94
ChIJQdg00piY9YgR5Q5tfiKUfT4 Gokul Sweets 4315 Abbotts Bridge Rd #3&4, Duluth, GA 30097, USA 34.02076 -84.16248 indian_restaurant&#124;vegan_restaurant&#124;vegetarian_restaurant&#124;dessert_restaurant&#124;dessert_shop&#124;confectionery&#124;asian_grocery_store&#124;grocery_store&#124;food_store&#124;restaurant&#124;food&#124;store&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 3.7 1463
ChIJ00A6K8qj9YgRj9z38ovoDpY Kang’s Table (강스테이블) Duluth |3312 Peachtree Industrial Blvd Suite-5, Duluth, GA 30096, USA | 34. 1422| -84 16489|rest urant&#124;korean_restaurant&#124;food&#124;point_of_interest&#124;establishment |PRIC _LEVEL_MODERATE | .2| 133|
ChIJE5Ar3P2Z9YgRB7-GQYg6aLQ THE BAKE 3294 Peachtree Industrial Blvd #1000, Duluth, GA 30096, USA 34.01643 -84.16289 brunch_restaurant&#124;coffee_shop&#124;bakery&#124;cafe&#124;food_store&#124;restaurant&#124;food&#124;store&#124;point_of_interest&#124;establishment NA 4.5 264
ChIJizPHG6CY9YgRLaAzAfFbE2Q Kurt’s Euro Bistro 3305 Peachtree Industrial Blvd #100, Duluth, GA 30096, USA 34.01577 -84.16489 steak_house&#124;bar&#124;restaurant&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 4.7 946
ChIJPXds5J-Y9YgReYY4QFDMH7A Chacko’s Udipi Indian Cuisine 3300 Peachtree Industrial Blvd J, Duluth, GA 30096, USA 34.01538 -84.16375 indian_restaurant&#124;restaurant&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 3.9 1279
ChIJIY1rg66Z9YgRDh-LrLj1ynQ Armando’s Taqueria 4190 Abbotts Bridge Rd, Duluth, GA 30096, USA 34.01999 -84.15901 mexican_restaurant&#124;restaurant&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 4.5 532
ChIJWZBMCpyY9YgRWG4P2dW3asY Armando’s Caribe 3170 Peachtree Industrial Blvd, Duluth, GA 30097, USA 34.01972 -84.15849 mexican_restaurant&#124;restaurant&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 4.5 1278
ChIJRdvQZw6i9YgR1ZnOmipYAck Roma Italiano Ristorante 3455 Peachtree Industrial Blvd #840, Duluth, GA 30096, USA 34.00644 -84.17075 restaurant&#124;catering_service&#124;italian_restaurant&#124;food_delivery&#124;food&#124;point_of_interest&#124;establishment PRICE_LEVEL_MODERATE 4.5 782

Explore & Report Findings

library(dplyr); library(stringr); library(sf); library(tmap)

# Identify POI type per row
# If a row's types contains "korean_restaurant", mark as "korean_restaurant" else "restaurant"
poi_exp <- poi_clean %>%
  mutate(
    poi_type = if_else(str_detect(types %||% "", "\\bkorean_restaurant\\b"),
                       "korean_restaurant","restaurant")
  )

# Basic summaries
sum_by_type <- poi_exp %>% count(poi_type, name = "n")
avg_rating  <- poi_exp %>% group_by(poi_type) %>% summarize(avg = mean(rating, na.rm=TRUE), .groups="drop")
cor_rr      <- suppressWarnings(cor(poi_exp$rating, log1p(poi_exp$user_ratings_total), use="complete.obs"))

# Price level vs rating
price_stats <- poi_exp %>%
  filter(!is.na(price_level)) %>%
  group_by(price_level) %>%
  summarize(avg = mean(rating, na.rm=TRUE), n = dplyr::n(), .groups="drop")

# Pick “best” POI: highest rating, tie-break by reviews
best_idx <- with(poi_exp, order(-rating, -user_ratings_total))[1]
best_row <- if (length(best_idx) && !is.na(best_idx)) poi_exp[best_idx, ] else NULL

Now let’s look at our map containing name, rating, total of user rating and price level

# Map by type (two colors)
poi_exp_sf <- st_as_sf(poi_exp, coords=c("lon","lat"), crs=4326, remove=FALSE)

tmap_mode("view")
tm_shape(city_ll_4326) + tm_polygons(alpha=.1, border.col="black") +
tm_shape(poi_exp_sf) + tm_dots(col="poi_type",
                               popup.vars=c("name","rating","user_ratings_total","price_level"))
# Simple scatter: rating vs. log(1+reviews)
par(mar=c(4,4,1,1))
x <- log1p(poi_exp$user_ratings_total)
y <- poi_exp$rating
plot(x, y, pch=19, cex=0.7, xlab="log(1 + review count)", ylab="Rating")
abline(lm(y ~ x), lty=2)

Observation 1: Rating vs. log(1 + review count) (scatter with trend)

  • The dashed regression line slopes upward, which means more reviews ↔︎ slightly higher ratings on average.
  • Low-review POIs show wide dispersion (some 5.0s, some <3.0) → greater uncertainty about true quality.
  • A few low-rated points with many reviews indicate consistently underperforming venues.
par(mar=c(4,4,1,1))
grp <- ifelse(grepl("\\bkorean_restaurant\\b", poi_exp$types), "korean_restaurant", "restaurant")
boxplot(poi_exp$rating ~ grp, xlab="POI type", ylab="Rating")

Observation 2: What the boxplot shows

  • Medians: Korean restaurants sit a bit higher (≈4.4–4.5) than the general restaurant group (≈4.1–4.3).
  • Spread: Korean restaurants have a tighter IQR, meaning ratings are more consistently high; the general group is wider.
  • Outliers: General restaurants include several low-rated outliers (one ≈1), which drag the distribution down. Korean has fewer/lighter low outliers.
  • Takeaway: The Korean subset appears slightly higher-rated and more consistent, but remember sample sizes may differ.
# order price levels if present (e.g., PRICE_LEVEL_INEXPENSIVE < MODERATE < EXPENSIVE)
df <- subset(poi_exp, !is.na(rating) & !is.na(price_level))
lvlmap <- c("PRICE_LEVEL_INEXPENSIVE"="1-Inexpensive",
            "PRICE_LEVEL_MODERATE"   ="2-Moderate",
            "PRICE_LEVEL_EXPENSIVE"  ="3-Expensive",
            "PRICE_LEVEL_VERY_EXPENSIVE"="4-Very Expensive")
df$price_ord <- factor(lvlmap[df$price_level],
                       levels=c("1-Inexpensive","2-Moderate","3-Expensive","4-Very Expensive"))
df <- droplevels(df)  # drop empty levels

par(mar=c(7,4,1,1))
boxplot(rating ~ price_ord, data=df, xlab="", ylab="Rating", las=2, outline=FALSE)
# add jittered points
x <- as.numeric(df$price_ord)
points(jitter(x, amount=.12), df$rating, pch=19, cex=.7)
mtext("Price level", side=1, line=5)

Observation 3: Price level vs. rating

  • Medians: “2-Moderate” skews slightly higher (~4.3–4.5) than “1-Inexpensive” (~3.9–4.2).
  • Spread: “1-Inexpensive” has a wider IQR and many low outliers (down to ~2.6–3.0), suggesting more variable quality.
  • Sparse tier: “3-Expensive” has only one observation, so we can’t generalize from it.
  • Takeaway: Mid-priced places tend to rate a bit higher and more consistently than inexpensive ones.
# prerequisites
stopifnot(exists("city_ll_4326"))
#use poi_exp_sf if you already made it; otherwise build from poi_clean
if (!exists("poi_exp_sf")) {
  stopifnot(exists("poi_clean"))
  poi_exp_sf <- sf::st_as_sf(poi_clean, coords = c("lon","lat"), crs = 4326, remove = FALSE)
}

# work in meters
cell_m <- 500
pts_m  <- sf::st_transform(poi_exp_sf, 3857)
city_m <- sf::st_transform(city_ll_4326, 3857)

# grid over city + id
grid_m  <- sf::st_make_grid(city_m, cellsize = cell_m, what = "polygons")
grid_sf <- sf::st_as_sf(grid_m) |>
  sf::st_intersection(city_m) |>
  dplyr::mutate(cell_id = dplyr::row_number())

# count points per cell (robust join)
pts_join <- sf::st_join(pts_m, dplyr::select(grid_sf, cell_id), join = sf::st_within, left = TRUE)
counts <- pts_join |>
  sf::st_drop_geometry() |>
  dplyr::filter(!is.na(cell_id)) |>
  dplyr::count(cell_id, name = "n_poi")

grid_sf <- grid_sf |>
  dplyr::left_join(counts, by = "cell_id") |>
  dplyr::mutate(n_poi = tidyr::replace_na(n_poi, 0L))

# outputs for mapping
grid_4326 <- sf::st_transform(grid_sf, 4326)
hot_4326  <- dplyr::filter(grid_4326, n_poi > 0)

# assumes you already have: grid_sf (3857 with n_poi), city_ll_4326
grid_4326 <- sf::st_transform(grid_sf, 4326)

# keep only cells with ≥1 POI
hot_4326 <- dplyr::filter(grid_4326, n_poi > 0)

tmap::tmap_mode("view")
tmap::tm_shape(city_ll_4326) +
  tmap::tm_borders(col = "black", lwd = 1.2) +                     # outline only (no dark fill)
  tmap::tm_shape(hot_4326) +
  tmap::tm_polygons(
    "n_poi",
    palette = "magma",
    style   = "fixed",
    breaks  = c(1, 3, 6, 10, Inf),
    labels  = c("1–2", "3–5", "6–9", "10+"),
    alpha   = 0.9,
    border.col = "white",
    lwd        = 0.25,
    title  = "POIs per 500 m"
  ) +
  tmap::tm_layout(legend.outside = TRUE, frame = FALSE)

Observation 4: Spatial Density Map

What can we learn from this map is that most restaurants line up along Peachtree Industrial Blvd, Buford Hwy (US-23), and the Duluth Hwy (GA-120) / Main St corridor and along,near the rail line and major shopping centers. These corridors concentrate both general restaurants and Korean spots, while residential edges show few or no POIs. In short: activity clusters on the main commercial spines, outside those spines, these location thins out.