Get data from api

tract <- suppressMessages(
  get_acs(geography = "tract",
        state = "IL",
        county = c("DuPage County", "Will County"),
        variables = c(hhincome = 'B19019_001'),
        year =2021,
        survey = "acs5",
        geometry = TRUE,
        output = "wide",
        progress = FALSE)
)

# select the city I want to see
naperville <- tigris::places('IL') %>% filter(NAME == "Naperville")
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |============================================                          |  64%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |=================================================                     |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
tract_naperville <- tract[naperville,]

# create a function
get_r <- function(poly, epsg_id){
  bb <- st_bbox(poly)
  
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  bb_center_x <- (bb[1]+bb[3])/2
  bb_center_y <- (bb[2]+bb[4])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf
  
  r = st_distance(bb_corner, bb_center)
  bb_center$radius <- r*1.1
  return(bb_center)
}

# run this function on our data
epsg_id <- 4326
r4all_loop <- vector("list", nrow(tract_naperville))

for (i in 1:nrow(tract_naperville)){
  r4all_loop[[i]] <- tract_naperville %>%
    st_transform(crs = epsg_id) %>%
    st_geometry() %>%
    .[[i]] %>%
    get_r(epsg_id)
}

r4all_loop<-bind_rows(r4all_loop)

# add the location data at columns
ready_4_yelp <- r4all_loop %>%
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

# create a function to get data from yelp api
get_yelp <- function(tract, category){

  Sys.sleep(1)
  n <- 1
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50,
                          radius = round(tract$radius), 
                          limit = 50)
  required_n <- ceiling(resp$total/50)
  out <- vector("list", required_n)
  out[[n]] <- resp$businesses
  
  names(out)[n] <- required_n
  
  if (resp$total >= 1000)
  {
    print(glue::glue("{n}th row has >= 1000 businesses."))
    return(out)
  } 
  else 
  {
    n <- n + 1
  
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } 
    out <- out %>% bind_rows()
    return(out)
  }
}

# 2) Get the first Business data : Kids Activities
yelp_kids_list <- vector("list", nrow(ready_4_yelp))

for (row in 1:nrow(ready_4_yelp)){
  yelp_kids_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "kids_activities"))
  print(paste0("Current row: ", row))
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
## [1] "Current row: 15"
## [1] "Current row: 16"
## [1] "Current row: 17"
## [1] "Current row: 18"
## [1] "Current row: 19"
## [1] "Current row: 20"
## [1] "Current row: 21"
## [1] "Current row: 22"
## [1] "Current row: 23"
## [1] "Current row: 24"
## [1] "Current row: 25"
## [1] "Current row: 26"
## [1] "Current row: 27"
## [1] "Current row: 28"
## [1] "Current row: 29"
## [1] "Current row: 30"
## [1] "Current row: 31"
## [1] "Current row: 32"
## [1] "Current row: 33"
## [1] "Current row: 34"
## [1] "Current row: 35"
## [1] "Current row: 36"
## [1] "Current row: 37"
## [1] "Current row: 38"
## [1] "Current row: 39"
## [1] "Current row: 40"
## [1] "Current row: 41"
## [1] "Current row: 42"
## [1] "Current row: 43"
## [1] "Current row: 44"
## [1] "Current row: 45"
## [1] "Current row: 46"
## [1] "Current row: 47"
## [1] "Current row: 48"
## [1] "Current row: 49"
## [1] "Current row: 50"
## [1] "Current row: 51"
## [1] "Current row: 52"
yelp_kids <- yelp_kids_list %>% bind_rows() %>% as_tibble()

yelp_ice_list <- vector("list", nrow(ready_4_yelp))

for (row in 1:nrow(ready_4_yelp)){
  yelp_ice_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "icecream"))
  print(paste0("Current row: ", row))
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
## [1] "Current row: 15"
## [1] "Current row: 16"
## [1] "Current row: 17"
## [1] "Current row: 18"
## [1] "Current row: 19"
## [1] "Current row: 20"
## [1] "Current row: 21"
## [1] "Current row: 22"
## [1] "Current row: 23"
## [1] "Current row: 24"
## [1] "Current row: 25"
## [1] "Current row: 26"
## [1] "Current row: 27"
## [1] "Current row: 28"
## [1] "Current row: 29"
## [1] "Current row: 30"
## [1] "Current row: 31"
## [1] "Current row: 32"
## [1] "Current row: 33"
## [1] "Current row: 34"
## [1] "Current row: 35"
## [1] "Current row: 36"
## [1] "Current row: 37"
## [1] "Current row: 38"
## [1] "Current row: 39"
## [1] "Current row: 40"
## [1] "Current row: 41"
## [1] "Current row: 42"
## [1] "Current row: 43"
## [1] "Current row: 44"
## [1] "Current row: 45"
## [1] "Current row: 46"
## [1] "Current row: 47"
## [1] "Current row: 48"
## [1] "Current row: 49"
## [1] "Current row: 50"
## [1] "Current row: 51"
## [1] "Current row: 52"
yelp_icecream <- yelp_ice_list %>% bind_rows() %>% as_tibble()

1. Delete duplicated rows

yelp_kids_uni <- distinct(yelp_kids)
yelp_ice_uni <- distinct(yelp_icecream)

2. Flatten nested columns

yelp_kids_uni <- yelp_kids_uni %>%
  tidyr::unnest(categories, names_sep = "_")

yelp_ice_uni <- yelp_ice_uni %>%
  tidyr::unnest(categories, names_sep = "_")

3. Delete rows that have missing coordinates variable

#remove duplicate values
yelp_kids_uni <- yelp_kids_uni %>% 
                drop_na('coordinates')

yelp_ice_uni <- yelp_ice_uni %>% 
                drop_na('coordinates')

4. Delete rows that fall outside of the boundary of the city you chose.

yelp_kids_sf <- yelp_kids_uni %>%
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>%
  filter(!is.na(x) & !is.na(y)) %>%
  st_as_sf(coords = c("x", "y"), crs = 4326)

yelp_ice_sf <- yelp_ice_uni %>%
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>%
  filter(!is.na(x) & !is.na(y)) %>%
  st_as_sf(coords = c("x","y"), crs=4326)

# create the buffer area
buffer_area <- st_buffer(ready_4_yelp, dist = ready_4_yelp$radius)

# Select the points within the buffer area
# 1) Kids Activities
in_yelp_kids_sf <- yelp_kids_sf %>%
  filter(rowSums(st_within(., buffer_area, sparse = FALSE)) > 0)

# 2) Ice cream
in_yelp_ice_sf <- yelp_ice_sf %>%
  filter(rowSums(st_within(., buffer_area, sparse = FALSE)) > 0)

Visualize the number of ratings

1) Ice cream

icecream_r <- in_yelp_ice_sf %>%
  group_by(rating) %>%
  summarise(n = n()) %>% 
  ungroup() %>%
  mutate(pct = n/sum(n)*100)

icecream_r <- icecream_r %>%
  mutate(rating = case_when(
    rating >= 0 & rating < 1 ~ '0-1',
    rating >= 1 & rating < 2 ~ '1-2',
    rating >= 2 & rating < 3 ~ '2-3',
    rating >= 3 & rating < 4 ~ '3-4',
    rating >= 4 & rating <= 5 ~ '4-5' 
  ))
  
ggplot(icecream_r, aes(x = rating, y = n, fill = rating)) +
geom_bar(stat = "identity") +
labs(title = "Review Count and Rating",
     x = "Review Count",
     y = "Rating") +
theme_minimal()

Visualize the number of ratings

2) Kids Activities

kids_r <- in_yelp_kids_sf %>%
group_by(rating) %>%
summarise(n = n()) %>% 
ungroup() %>%
mutate(pct = n/sum(n)*100)

kids_r <- kids_r %>%
  mutate(rating = case_when(
    rating >= 0 & rating < 1 ~ '0-1',
    rating >= 1 & rating < 2 ~ '1-2',
    rating >= 2 & rating < 3 ~ '2-3',
    rating >= 3 & rating < 4 ~ '3-4',
    rating >= 4 & rating <= 5 ~ '4-5' 
  ))

ggplot(kids_r, aes(x = rating, y = n, fill = rating)) +
  geom_bar(stat = "identity") +
  labs(title = "Review Count and Rating",
       x = "Review Count",
       y = "Rating") +
  theme_minimal()

Tell a short story about finding(s) that interests you

I analyzed both data tables and found that neither had duplicate rows or rows with null values. However, there were some data points outside the buffer area. Additionally, after flattening the ‘category’ columns, I found that the ice cream table had 21 category titles, while the kids activities table had 26. I initially wanted to explore the relationship between review count and rating, so I visualized the data with a scatter plot, but no noticeable correlation emerged. Consequently, I categorized the ratings into five groups and visualized the number of ratings. The distribution of ratings differs between the two tables: for ice cream, the most frequent rating range is from 3 to 4, followed by 4 to 5. In contrast, for kids activities, the highest number of ratings falls within the 4 to 5 range, with the next most common range being 0 to 1.