Hotels and Car Rental businesses in Canton City, Cherokee County, GA

Kaiyu Zhou

2024-09-27

Library packages

Read Yelp data, rds file

yelp_all <- readRDS("yelp_data.rds")

Tidying data

Delete duplicated rows

yelp_unique <- yelp_all %>% 
  distinct(id, .keep_all=T)

glue::glue("Before dropping duplicated rows, there were {nrow(yelp_all)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping duplicated rows, there were 110 rows. After dropping them, there are 24 rows

Flatten nested columns that have multiple variables in one column

concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list))

Missing values

yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x))) 
##                              id                           alias 
##                               0                               0 
##                            name                       image_url 
##                               0                               0 
##                       is_closed                             url 
##                               0                               0 
##                    review_count                      categories 
##                               0                               0 
##                          rating                    transactions 
##                               0                               0 
##                           phone                   display_phone 
##                               0                               0 
##                        distance                  business_hours 
##                               0                               0 
##                           price            coordinates.latitude 
##                              21                               0 
##           coordinates.longitude               location.address1 
##                               0                               3 
##               location.address2               location.address3 
##                              14                              13 
##                   location.city               location.zip_code 
##                               0                               0 
##                location.country                  location.state 
##                               0                               0 
##        location.display_address attributes.business_temp_closed 
##                               0                              24 
## attributes.waitlist_reservation 
##                              24

Drop missing

identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude))
## [1] TRUE
# Drop rows that have missing values in `coordinates.longitude` and 'coordinates.latitude' 
yelp_dropna <- yelp_flat %>% 
  drop_na(coordinates.longitude, coordinates.latitude)

print(paste0("Before: ", nrow(yelp_flat)))
## [1] "Before: 24"
print(paste0("After: ", nrow(yelp_dropna)))
## [1] "After: 24"

Clear points ouside the city boundary

canton <- tigris::places("GA", progress_bar = FALSE) %>% 
  filter(NAME == 'Canton') %>% 
  st_transform(4326)
## Retrieving data for the year 2022
yelp_sf <- yelp_dropna %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), 
           crs = 4326)
  
# sf subsets
yelp_in <- yelp_sf[canton, ]

print(paste0("Before: ", nrow(yelp_sf)))
## [1] "Before: 24"
print(paste0("After: ", nrow(yelp_in)))
## [1] "After: 15"
yelp_in <- yelp_in %>%
  mutate(category_type = case_when(
    str_detect(categories, "Car Rental") ~ "Car Rental",
    str_detect(categories, "Hotels") ~ "Hotels"
  ))

Save yelp_in

saveRDS(yelp_in, here('yelp_data_in_canton.rds'))

Read canton city rds

canton <- readRDS("canton.rds")
tract_canton <- readRDS("tract_canton.rds")

Spatial join data

canton <- canton %>% 
  st_transform(4326)
tract_canton <- tract_canton %>% 
  st_transform(4326)
yelp_in <- yelp_in %>%
  st_transform(4326)
census_yelp <- st_join(tract_canton, yelp_in, join = st_intersects)
census_yelp %>% head()
## Simple feature collection with 6 features and 30 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.59237 ymin: 34.13413 xmax: -84.39161 ymax: 34.38819
## Geodetic CRS:  WGS 84
##          GEOID                                          NAME hhincomeE
## 2  13057090707 Census Tract 907.07, Cherokee County, Georgia     85684
## 10 13057090703 Census Tract 907.03, Cherokee County, Georgia     43647
## 16 13057090301 Census Tract 903.01, Cherokee County, Georgia    126250
## 17 13057090603 Census Tract 906.03, Cherokee County, Georgia     86563
## 20 13057090102 Census Tract 901.02, Cherokee County, Georgia     75840
## 24 13057090710 Census Tract 907.10, Cherokee County, Georgia    100893
##    hhincomeM   id alias name image_url is_closed  url review_count categories
## 2      15839 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
## 10     13022 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
## 16     32291 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
## 17     23368 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
## 20      9309 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
## 24      5307 <NA>  <NA> <NA>      <NA>        NA <NA>           NA       <NA>
##    rating transactions phone display_phone distance business_hours price
## 2      NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
## 10     NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
## 16     NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
## 17     NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
## 20     NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
## 24     NA         <NA>  <NA>          <NA>       NA           NULL  <NA>
##    location.address1 location.address2 location.address3 location.city
## 2               <NA>              <NA>              <NA>          <NA>
## 10              <NA>              <NA>              <NA>          <NA>
## 16              <NA>              <NA>              <NA>          <NA>
## 17              <NA>              <NA>              <NA>          <NA>
## 20              <NA>              <NA>              <NA>          <NA>
## 24              <NA>              <NA>              <NA>          <NA>
##    location.zip_code location.country location.state location.display_address
## 2               <NA>             <NA>           <NA>                     <NA>
## 10              <NA>             <NA>           <NA>                     <NA>
## 16              <NA>             <NA>           <NA>                     <NA>
## 17              <NA>             <NA>           <NA>                     <NA>
## 20              <NA>             <NA>           <NA>                     <NA>
## 24              <NA>             <NA>           <NA>                     <NA>
##    attributes.business_temp_closed attributes.waitlist_reservation
## 2                               NA                              NA
## 10                              NA                              NA
## 16                              NA                              NA
## 17                              NA                              NA
## 20                              NA                              NA
## 24                              NA                              NA
##    category_type                       geometry
## 2           <NA> MULTIPOLYGON (((-84.52836 3...
## 10          <NA> MULTIPOLYGON (((-84.51658 3...
## 16          <NA> MULTIPOLYGON (((-84.59071 3...
## 17          <NA> MULTIPOLYGON (((-84.48169 3...
## 20          <NA> MULTIPOLYGON (((-84.49538 3...
## 24          <NA> MULTIPOLYGON (((-84.55758 3...
yelp_census <- st_join(yelp_in, tract_canton, join = st_intersects)
yelp_census
## Simple feature collection with 15 features and 30 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.50495 ymin: 34.22241 xmax: -84.4608 ymax: 34.25922
## Geodetic CRS:  WGS 84
## First 10 features:
##                        id
## 10 Zo6haDZ-Rqp7ndDcqggZ0A
## 11 x_OWVk-E9TaZb0kaM4tkjg
## 12 A0VbxD6eVzuF0gId7Twd1g
## 13 SIlkALXHW60N32RqsEwhiQ
## 14 Gg60QjMkxds2C_IneLB3Tg
## 15 9aOx5PrQI4PloEqPTalCRA
## 16 aNJHteN5VjN6yJqasZlvDQ
## 17 bhl2MHAx3y4BiCUoQjQEcQ
## 18 4Qxv1uHOxiLQuj3ojAeeyw
## 19 P-nmWiDeLnd-fOHkFG2f5g
##                                                        alias
## 10                         hampton-inn-atlanta-canton-canton
## 11 fairfield-inn-and-suites-canton-riverstone-parkway-canton
## 12                                          motel-6-canton-3
## 13           holiday-inn-express-and-suites-canton-canton-11
## 14                         days-inn-by-wyndham-canton-canton
## 15       country-inn-and-suites-by-radisson-canton-ga-canton
## 16                             comfort-inn-and-suites-canton
## 17                           quality-inn-and-suites-canton-6
## 18                                      homestead-inn-canton
## 19                            microtel-inn-and-suites-canton
##                                                name
## 10                       Hampton Inn Atlanta-Canton
## 11 Fairfield Inn & Suites Canton Riverstone Parkway
## 12                                          Motel 6
## 13              Holiday Inn Express & Suites Canton
## 14                       Days Inn by Wyndham Canton
## 15     Country Inn & Suites by Radisson, Canton, GA
## 16                             Comfort Inn & Suites
## 17                             Quality Inn & Suites
## 18                                    Homestead Inn
## 19                          Microtel Inn and Suites
##                                                               image_url
## 10 https://s3-media2.fl.yelpcdn.com/bphoto/IW-NmR7Y1cPEMakTf1fX0Q/o.jpg
## 11 https://s3-media1.fl.yelpcdn.com/bphoto/xA1UJo7SRah0LkC6-uB9uA/o.jpg
## 12 https://s3-media2.fl.yelpcdn.com/bphoto/P0TlJXC1z8_b2ErtG5l5-g/o.jpg
## 13 https://s3-media3.fl.yelpcdn.com/bphoto/tcoCEgRWmBRAuYV7_5oaqw/o.jpg
## 14 https://s3-media4.fl.yelpcdn.com/bphoto/v8bzjqYT-qcm__MkrB-BIA/o.jpg
## 15                                                                     
## 16 https://s3-media1.fl.yelpcdn.com/bphoto/Moqniq7zuV-QzjgutLOjow/o.jpg
## 17 https://s3-media2.fl.yelpcdn.com/bphoto/qR4zvIRY_S-vmxWQMZcv8g/o.jpg
## 18                                                                     
## 19                                                                     
##    is_closed
## 10     FALSE
## 11     FALSE
## 12     FALSE
## 13     FALSE
## 14     FALSE
## 15     FALSE
## 16     FALSE
## 17     FALSE
## 18     FALSE
## 19     FALSE
##                                                                                                                                                                                                                       url
## 10                         https://www.yelp.com/biz/hampton-inn-atlanta-canton-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 11 https://www.yelp.com/biz/fairfield-inn-and-suites-canton-riverstone-parkway-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 12                                          https://www.yelp.com/biz/motel-6-canton-3?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 13           https://www.yelp.com/biz/holiday-inn-express-and-suites-canton-canton-11?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 14                         https://www.yelp.com/biz/days-inn-by-wyndham-canton-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 15       https://www.yelp.com/biz/country-inn-and-suites-by-radisson-canton-ga-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 16                             https://www.yelp.com/biz/comfort-inn-and-suites-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 17                           https://www.yelp.com/biz/quality-inn-and-suites-canton-6?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 18                                      https://www.yelp.com/biz/homestead-inn-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
## 19                            https://www.yelp.com/biz/microtel-inn-and-suites-canton?adjust_creative=yhocquUnsSVW3lwquDGWlQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=yhocquUnsSVW3lwquDGWlQ
##    review_count                    categories rating transactions        phone
## 10           27                        Hotels    3.4              +17703457400
## 11            1 Hotels, Venues & Event Spaces    5.0              +18558166193
## 12           17                        Hotels    1.8              +17703458700
## 13            3 Venues & Event Spaces, Hotels    4.3              +16783291140
## 14           10                        Hotels    2.6              +18003291073
## 15            8                        Hotels    2.4              +14707612019
## 16            2                        Hotels    1.0              +17704797300
## 17            2                        Hotels    1.0              +17703451994
## 18            1            Hotels, Apartments    1.0              +17707200888
## 19            2                        Hotels    1.0              +17703458700
##     display_phone distance
## 10 (770) 345-7400 4937.944
## 11 (855) 816-6193 5368.347
## 12 (770) 345-8700 4972.966
## 13 (678) 329-1140 5273.145
## 14 (800) 329-1073 5297.867
## 15 (470) 761-2019 4979.686
## 16 (770) 479-7300 4930.712
## 17 (770) 345-1994 4712.304
## 18 (770) 720-0888 5146.432
## 19 (770) 345-8700 4972.966
##                                                                                                                                                             business_hours
## 10        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 11        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 12        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 13                                                                                                                                                                    NULL
## 14 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 0900, 0900, 0900, 0900, 0900, 0900, 0900, 1700, 1700, 1700, 1700, 1700, 1700, 1700, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 15        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 16        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 17        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0, 1, 2, 3, 4, 5, 6, REGULAR, TRUE
## 18                                                                                                                                                                    NULL
## 19                                                                                                                                                                    NULL
##    price          location.address1 location.address2 location.address3
## 10    $$            710 Transit Ave              <NA>              <NA>
## 11  <NA> 120 Reinhardt College Pkwy              <NA>              <NA>
## 12    $$        114 Riverpoint Pkwy                                <NA>
## 13  <NA>         145 Park Center Dr              <NA>              <NA>
## 14    $$         101 Juniper Street                                    
## 15  <NA>            705 Transit Ave                                <NA>
## 16  <NA>         713 Transit Avenue              <NA>              <NA>
## 17  <NA>            138 Keith Drive              <NA>              <NA>
## 18  <NA>       1615 Ball Ground Hwy                                    
## 19  <NA>      114 River Pointe Pkwy              <NA>              <NA>
##    location.city location.zip_code location.country location.state
## 10        Canton             30114               US             GA
## 11        Canton             30114               US             GA
## 12        Canton             30114               US             GA
## 13        Canton             30114               US             GA
## 14        Canton             30114               US             GA
## 15        Canton             30114               US             GA
## 16        Canton             30114               US             GA
## 17        Canton             30114               US             GA
## 18        Canton             30114               US             GA
## 19        Canton             30114               US             GA
##                        location.display_address attributes.business_temp_closed
## 10            710 Transit Ave, Canton, GA 30114                              NA
## 11 120 Reinhardt College Pkwy, Canton, GA 30114                              NA
## 12        114 Riverpoint Pkwy, Canton, GA 30114                              NA
## 13         145 Park Center Dr, Canton, GA 30114                              NA
## 14         101 Juniper Street, Canton, GA 30114                              NA
## 15            705 Transit Ave, Canton, GA 30114                              NA
## 16         713 Transit Avenue, Canton, GA 30114                              NA
## 17            138 Keith Drive, Canton, GA 30114                              NA
## 18       1615 Ball Ground Hwy, Canton, GA 30114                              NA
## 19      114 River Pointe Pkwy, Canton, GA 30114                              NA
##    attributes.waitlist_reservation category_type       GEOID
## 10                              NA        Hotels 13057090402
## 11                              NA        Hotels 13057090401
## 12                              NA        Hotels 13057090402
## 13                              NA        Hotels 13057090401
## 14                              NA        Hotels 13057090401
## 15                              NA        Hotels 13057090402
## 16                              NA        Hotels 13057090402
## 17                              NA        Hotels 13057090402
## 18                              NA        Hotels 13057090402
## 19                              NA        Hotels 13057090402
##                                             NAME hhincomeE hhincomeM
## 10 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 11 Census Tract 904.01, Cherokee County, Georgia     42898     11295
## 12 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 13 Census Tract 904.01, Cherokee County, Georgia     42898     11295
## 14 Census Tract 904.01, Cherokee County, Georgia     42898     11295
## 15 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 16 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 17 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 18 Census Tract 904.02, Cherokee County, Georgia     75507     16083
## 19 Census Tract 904.02, Cherokee County, Georgia     75507     16083
##                      geometry
## 10 POINT (-84.47032 34.25411)
## 11 POINT (-84.48037 34.25348)
## 12  POINT (-84.4608 34.25818)
## 13 POINT (-84.47911 34.25257)
## 14 POINT (-84.48716 34.24516)
## 15 POINT (-84.47073 34.25462)
## 16 POINT (-84.46961 34.25473)
## 17 POINT (-84.46423 34.25514)
## 18 POINT (-84.46497 34.25922)
## 19  POINT (-84.46228 34.2585)
# Visualize census data using information from Yelp
tm_shape(census_yelp %>% group_by(GEOID) %>% summarise(rating=mean(rating))) + 
  tm_polygons(col = "rating")

Findings:

Rating score and Review Count

The most frequent rating score is 1. There is a weak correlation of 0.32, which indicates that Rating Score is not a strong predictor of Review Count

review_counts <- as.data.frame(table(yelp_in$rating))
review_counts
##    Var1 Freq
## 1     0    1
## 2     1    6
## 3   1.7    1
## 4   1.8    1
## 5   2.4    1
## 6   2.6    1
## 7   2.9    1
## 8   3.4    1
## 9   4.3    1
## 10    5    1
# Scatter plot of rating vs. review_count
plot(yelp_in$rating, yelp_in$review_count,
     main = "Scatter Plot of Rating vs. Review Count",
     xlab = "Rating Score",
     ylab = "Review Count",
     pch = 16,                 # Type of point (solid circle)
     col = "blue")             # Point color

# Trend line
abline(lm(review_count ~ rating, data = yelp_in), col = "red", lwd = 2)

# Calculate correlation
correlation <- cor(yelp_in$rating, yelp_in$review_count, use = "complete.obs")
text(min(yelp_in$rating), max(yelp_in$review_count), 
     paste("Correlation: ", round(correlation, 2)), pos = 4, col = "darkgreen")

Price

Only 3 businesses have a price value, and all of them are labeled as $$

# Count Price
price_counts <- as.data.frame(table(yelp_in$price))
price_counts
##   Var1 Freq
## 1   $$    3

Househole income and rating

There is a weak negative relationship between household income and Yelp rating. The household income of residents living in the area does not significantly influence the ratings given to hotels or car rental businesses. Hotels and car rentals are services predominantly used by non-residents such as tourists, business travelers, or other visitors. As a result, the ratings for these businesses are more reflective of the experiences and perceptions of these visitors rather than the local population.

# Assuming your data frame is 'yelp_data' with columns 'household_income' and 'rating'
# Replace 'household_income' and 'rating' with actual column names if they differ.

# Scatter plot to see the pattern
plot(yelp_census$rating, yelp_census$hhincomeE,
     main = "Scatter Plot of Household Income vs. Yelp Rating",
     xlab = "Yelp Rating",
     ylab = "Household Income",
     pch = 16, col = "blue")

# Add a linear regression line
abline(lm(hhincomeE ~ rating, data = yelp_census), col = "red", lwd = 2)

# Calculate correlation coefficient
income_rating_correlation <- cor(yelp_census$rating, yelp_census$hhincomeE, use = "complete.obs")
text(max(yelp_census$rating-1.2), max(yelp_census$hhincomeE), 
     paste("Correlation: ", round(income_rating_correlation, 2)), pos = 4, col = "darkgreen")