1. Importing Libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(here)
## here() starts at C:/Users/chan303/OneDrive - Georgia Institute of Technology/CP8883BK/UA_module2
library(readr)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(tidycensus)
library(skimr)

2. Preparing Yelp Data

# Read my data
my_yelp <- read_rds(here("..", "UA_module1", "Data", "yelp_all.rds"))

3. Tidying Yelp Data

Issue 1: Duplicate Data

# Deleting duplicate businesses
yelp_unique <- my_yelp %>%
  distinct(id, .keep_all=T)

glue::glue("Before dropping NA, there were {nrow(my_yelp)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping NA, there were 3286 rows. After dropping them, there are 466 rows
# why are there so many duplicate rows in the first place?

Issue 2: Flattening Columns

# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) 

Issue 3: Missing Values

#find missing values 
yelp_flat %>%
  map_dbl(., function(x) sum(is.na(x)))
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    price                    phone 
##                        0                      209                        0 
##            display_phone                 distance     coordinates.latitude 
##                        0                        0                        0 
##    coordinates.longitude        location.address1        location.address2 
##                        0                        4                      100 
##        location.address3            location.city        location.zip_code 
##                      117                        0                        0 
##         location.country           location.state location.display_address 
##                        0                        0                        0
# First, let's verify that the 4 missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)

# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)

4. Visualization (1) Get boundary from census

#### Tract polygons for the Yelp query
champaign_census <- suppressMessages(
  get_acs(geography = "tract", # or "block group", "county", "state" etc. 
          state = "IL",
          county = c("Champaign"), 
          variables = c(hhincome = 'B19019_001'),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%

I found that census data CRS is NAD83, while Yelp data CRS is WGS84. I will set census CRS to WGS84 to avoid error while running ‘st_intersects’.

# head(champaign_census)
champaign_census <- st_transform(champaign_census, 'WGS84')
# head(champaign_census)
# Converting yelp_dropna2 into a sf object
yelp_sf <- yelp_dropna2 %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
  
# sf subsets
yelp_in <- yelp_sf[champaign_census %>% st_union(), ,op = st_intersects]

I found that 6 businesses were outside the boundary.

glue::glue("nrow before: {nrow(my_yelp)} -> nrow after: {nrow(yelp_in)} \n
            ncol before: {ncol(my_yelp)} -> ncol after: {ncol(yelp_in)} \n") %>% 
  print()
## nrow before: 3286 -> nrow after: 251 
## 
## ncol before: 16 -> ncol after: 23

Figure 1: Price distribution of shops and hotels in Champaign County

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price") 

5. Appending Census Data

# census is currently sfc. Convert it to sf.
census_sf <- champaign_census %>% st_sf()

# Spatial join
census_yelp <- st_join(census_sf, yelp_in, join = st_intersects)
yelp_census <- st_join(yelp_in, census_sf, join = st_intersects)

# View
census_yelp %>% head()
## Simple feature collection with 6 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -88.3143 ymin: 40.0622 xmax: -88.12398 ymax: 40.13507
## Geodetic CRS:  WGS 84
##           GEOID                                           NAME hhincomeE
## 1   17019005800    Census Tract 58, Champaign County, Illinois     49855
## 2   17019005900    Census Tract 59, Champaign County, Illinois     10988
## 3   17019001206 Census Tract 12.06, Champaign County, Illinois    108385
## 4   17019005600    Census Tract 56, Champaign County, Illinois     60774
## 4.1 17019005600    Census Tract 56, Champaign County, Illinois     60774
## 5   17019000700     Census Tract 7, Champaign County, Illinois     40074
##     hhincomeM                     id                           alias
## 1        6225 z-jVqx3Wx9fKOK6fn86Csg               klose-knit-urbana
## 2        2328 5lXhJe7gob74m93eyjyp3A     illini-union-hotel-urbana-2
## 3        3924                   <NA>                            <NA>
## 4       11208 mqW1uq15v8iABsh9BzDZjA             bakers-bikes-urbana
## 4.1     11208 Q9apdosOkT4rnX3YRurbmA country-arbors-nursery-urbana-3
## 5        5883 0HVZte6_aF1yDNeAde9u9Q  orphans-treasure-box-champaign
##                       name
## 1               Klose Knit
## 2       Illini Union Hotel
## 3                     <NA>
## 4            Baker's Bikes
## 4.1 Country Arbors Nursery
## 5     Orphans Treasure Box
##                                                                image_url
## 1   https://s3-media4.fl.yelpcdn.com/bphoto/weHG-Pp-SSTwdlOid20SwQ/o.jpg
## 2   https://s3-media3.fl.yelpcdn.com/bphoto/ly2xBb_vyNq3FLiEYaV3Pw/o.jpg
## 3                                                                   <NA>
## 4   https://s3-media4.fl.yelpcdn.com/bphoto/yvbHZ0fmRfvuFRzxwuTCUw/o.jpg
## 4.1 https://s3-media2.fl.yelpcdn.com/bphoto/E9XXy-fjkzL9FZz-s4Yy7Q/o.jpg
## 5   https://s3-media3.fl.yelpcdn.com/bphoto/1A-x7_-9PMGgtJ1QtmTj8A/o.jpg
##     is_closed
## 1       FALSE
## 2       FALSE
## 3          NA
## 4       FALSE
## 4.1     FALSE
## 5       FALSE
##                                                                                                                                                                                              url
## 1                 https://www.yelp.com/biz/klose-knit-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 2       https://www.yelp.com/biz/illini-union-hotel-urbana-2?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 3                                                                                                                                                                                           <NA>
## 4               https://www.yelp.com/biz/bakers-bikes-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 4.1 https://www.yelp.com/biz/country-arbors-nursery-urbana-3?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 5    https://www.yelp.com/biz/orphans-treasure-box-champaign?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
##     review_count                                   categories rating
## 1             12                            Knitting Supplies    4.5
## 2             16                                       Hotels    2.5
## 3             NA                                         <NA>     NA
## 4             33               Bikes, Bike Repair/Maintenance    4.5
## 4.1           10                        Nurseries & Gardening    4.5
## 5              4 Community Service/Non-Profit, Used Bookstore    5.0
##     transactions price        phone  display_phone  distance
## 1                 $$$$ +12173442123 (217) 344-2123  744.0377
## 2                   $$ +12173331241 (217) 333-1241  553.3484
## 3           <NA>  <NA>         <NA>           <NA>        NA
## 4                    $ +12173650318 (217) 365-0318 1125.3185
## 4.1                 $$ +12173671072 (217) 367-1072 2439.1093
## 5                    $ +12172983202 (217) 298-3202  972.7355
##         location.address1 location.address2 location.address3 location.city
## 1   311 W Springfield Ave                                            Urbana
## 2         1401 W Green St                                            Urbana
## 3                    <NA>              <NA>              <NA>          <NA>
## 4          1003 S Lynn St                                            Urbana
## 4.1 1742 County Rd 1400 N                                            Urbana
## 5          826 Pioneer St              <NA>                       Champaign
##     location.zip_code location.country location.state
## 1               61801               US             IL
## 2               61801               US             IL
## 3                <NA>             <NA>           <NA>
## 4               61801               US             IL
## 4.1             61802               US             IL
## 5               61820               US             IL
##                    location.display_address                       geometry
## 1   311 W Springfield Ave, Urbana, IL 61801 MULTIPOLYGON (((-88.21775 4...
## 2         1401 W Green St, Urbana, IL 61801 MULTIPOLYGON (((-88.22882 4...
## 3                                      <NA> MULTIPOLYGON (((-88.3143 40...
## 4          1003 S Lynn St, Urbana, IL 61801 MULTIPOLYGON (((-88.20497 4...
## 4.1 1742 County Rd 1400 N, Urbana, IL 61802 MULTIPOLYGON (((-88.20497 4...
## 5       826 Pioneer St, Champaign, IL 61820 MULTIPOLYGON (((-88.25797 4...

Figure 2: Ratings distribution of shops and hotels in Champaign County

# visualize distribution of ratings in census tracts
tm_shape(census_yelp %>% group_by(GEOID) %>% summarise(rating=mean(rating))) + 
  tm_polygons(col = "rating", style = "quantile")
yelp_census %>% head()
## Simple feature collection with 6 features and 26 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -88.2366 ymin: 40.10431 xmax: -88.19821 ymax: 40.11276
## Geodetic CRS:  WGS 84
##                       id                          alias                    name
## 1 KuJEyXesWoyOm3GQm0rqcg          the-idea-store-urbana          The Idea Store
## 2 mqW1uq15v8iABsh9BzDZjA            bakers-bikes-urbana           Baker's Bikes
## 3 e0tj2Jip560QbC8N9pF6xw              fyxit-champaign-2                   FYXIT
## 4 ckdGk8ForF9zEczmu4-tTA     strawberry-fields-urbana-2       Strawberry Fields
## 5 POuzQLJuPWx0i-dUNdRsWQ international-galleries-urbana International Galleries
## 6 7Vaj54SeGM3RupuvNlccOw             heel-to-toe-urbana             Heel To Toe
##                                                              image_url
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/Rc6DmJoBp9zdjrsdURIKPg/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/yvbHZ0fmRfvuFRzxwuTCUw/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/3u2wpWz6vU4geIOI9K61VA/o.jpg
## 4 https://s3-media2.fl.yelpcdn.com/bphoto/zO4Ie2IkCY9Asifnboy82w/o.jpg
## 5                                                                     
## 6 https://s3-media2.fl.yelpcdn.com/bphoto/sVy8H_AQ0Zy4jX7Dhq5CJA/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                           url
## 1          https://www.yelp.com/biz/the-idea-store-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 2            https://www.yelp.com/biz/bakers-bikes-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 3              https://www.yelp.com/biz/fyxit-champaign-2?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 4     https://www.yelp.com/biz/strawberry-fields-urbana-2?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 5 https://www.yelp.com/biz/international-galleries-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
## 6             https://www.yelp.com/biz/heel-to-toe-urbana?adjust_creative=-To_AbVIHKh8mMAunCeEbg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=-To_AbVIHKh8mMAunCeEbg
##   review_count                                         categories rating
## 1           22                        Art Supplies, Thrift Stores    4.5
## 2           33                     Bikes, Bike Repair/Maintenance    4.5
## 3           36 Electronics Repair, Mobile Phone Repair, Computers    4.5
## 4           30   Organic Stores, Bakeries, Vitamins & Supplements    3.5
## 5            9                    Jewelry, Art Galleries, Framing    4.5
## 6           32                Shoe Stores, Shoe Repair, Orthotics    3.5
##       transactions price        phone  display_phone  distance
## 1                      $ +12173527878 (217) 352-7878  612.9849
## 2                      $ +12173650318 (217) 365-0318 1125.3185
## 3                     $$ +12176974171 (217) 697-4171 2216.2918
## 4 delivery, pickup    $$ +12173281655 (217) 328-1655  783.2241
## 5                     $$ +12173282254 (217) 328-2254  725.2433
## 6                    $$$ +12173672880 (217) 367-2880  843.9406
##       location.address1 location.address2 location.address3 location.city
## 1    125 Lincoln Square                                            Urbana
## 2        1003 S Lynn St                                            Urbana
## 3        202 E Green St             Ste 3                       Champaign
## 4 306 W Springfield Ave                                            Urbana
## 5    118 Lincoln Square                                            Urbana
## 6         106 W Main St                                            Urbana
##   location.zip_code location.country location.state
## 1             61801               US             IL
## 2             61801               US             IL
## 3             61820               US             IL
## 4             61801               US             IL
## 5             61801               US             IL
## 6             61801               US             IL
##                     location.display_address       GEOID
## 1       125 Lincoln Square, Urbana, IL 61801 17019011100
## 2           1003 S Lynn St, Urbana, IL 61801 17019005600
## 3 202 E Green St, Ste 3, Champaign, IL 61820 17019000301
## 4    306 W Springfield Ave, Urbana, IL 61801 17019011100
## 5       118 Lincoln Square, Urbana, IL 61801 17019011100
## 6            106 W Main St, Urbana, IL 61801 17019011100
##                                            NAME hhincomeE hhincomeM
## 1  Census Tract 111, Champaign County, Illinois     22741     11621
## 2   Census Tract 56, Champaign County, Illinois     60774     11208
## 3 Census Tract 3.01, Champaign County, Illinois      7099      1978
## 4  Census Tract 111, Champaign County, Illinois     22741     11621
## 5  Census Tract 111, Champaign County, Illinois     22741     11621
## 6  Census Tract 111, Champaign County, Illinois     22741     11621
##                     geometry
## 1 POINT (-88.20729 40.11008)
## 2 POINT (-88.19821 40.10431)
## 3  POINT (-88.2366 40.11037)
## 4 POINT (-88.21127 40.11263)
## 5 POINT (-88.20721 40.11124)
## 6 POINT (-88.20776 40.11276)

Figure 3: Income distribution census tracts and shops/hotels in Champaign County

tm_shape(yelp_census) + tm_dots(col="hhincomeE")

Discussion

After tidying the data, I could drastically reduce the number of businesses as many were duplicated data points beforehand. There are 251 businesses in total after data cleaning, while as there were 3286 businesses before tidying. From Figure 1, I found that cheapest shops/hotels are usually located in Downtown Champaign. Businesses with slightly higher price ($$) were mostly aligned with Neil Street, which vertically crosses the City of Champaign.This pattern could be related to household income. Figure 3 shows that Downtown Champaign has lower household income compared to Southern Champaign. Meanwhile, the relationship between ratings and prices is not clear. This might be because the number of business is not large enough, or simply because the distribution is random. From Figure 2, the map shows that ratings were likely to be high in the East part of Champaign but this is also not reliable because the number of business is small.

-END-