Mini Assignment 2

Import Libraries

library(tidycensus)
library(sf)
library(tmap)
library(tidyverse)
library(here)
library(knitr)
library(kableExtra)
library(glue)
library(tigris)

1. Import my data

data_all <- readRDS(here::here("ithaca_poi_data_original.rds"))

#2. Tidy my data ##2.1. Remove duplicated rows

# Remove duplicated rows
data_all_unique <- data_all %>%
  distinct(places.id, .keep_all = TRUE)

glue::glue("Before dropping duplicated rows, there were {nrow(data_all)} rows. After dropping them, there are {nrow(data_all_unique)} rows.")
## Before dropping duplicated rows, there were 103 rows. After dropping them, there are 18 rows.

##2.2. Flatten/unnest list-columns

#Collapse the places.types column
poi_flat <- data_all_unique %>%
  mutate(places.types = places.types %>% 
           map_chr(., ~ str_c(.x, collapse = ",")))

## Handle list-columns other than places.types
### Create a separate dataset from the `places.reviews` column
reviews <- poi_flat %>% select(places.id, places.reviews)

### Convert `reviews` into a long format
reviews <- reviews %>% 
  unnest_longer(places.reviews) %>%  # one row per review
  unnest_wider(places.reviews)      # expand review details into columns

### Remove the now-redundant reviews column from the main POI dataset
poi_flat <- poi_flat %>% select(-places.reviews)

##2.3. Handle missing values I first verified that the “rating” column is complete (no missing values). To analyze whether ratings vary with price, I then restricted the sample to POIs with a non-missing “priceLevel”. Next, I used “dineIn” as a proxy for restaurant service and excluded rows where “dineIn” is missing (treating those as not proper restaurants). Finally, because Cornell students frequently pick up food between classes, I further limited the sample to venues that offer takeout.

# Drop rows that have missing values in any of the four columns
poi_dropna <- poi_flat %>% 
  drop_na(places.priceLevel, places.dineIn, places.takeout)
  
glue::glue("Before dropping missing value rows, there were {nrow(poi_flat)} rows. After dropping them, there are {nrow(poi_dropna)} rows.")
## Before dropping missing value rows, there were 18 rows. After dropping them, there are 14 rows.

##2.4. Filter by location

# Filter by location
ithaca <- tigris::places("NY") %>% 
  filter(NAME == 'Ithaca') %>% 
  st_transform(4326)
# Converting poi_dropna into a sf object
poi_sf <- poi_dropna %>% 
  st_as_sf(coords=c("places.location.longitude", "places.location.latitude"), 
           crs = 4326)

# POIs within the city boundary
poi_sf_in <- poi_sf[ithaca, ]

glue::glue("Before dropping POIs outside of Ithaca, there were {nrow(poi_sf)} rows. After dropping them, there are {nrow(poi_sf_in)} rows.")
## Before dropping POIs outside of Ithaca, there were 14 rows. After dropping them, there are 12 rows.

Show my cleaned POI data

## Drop some unnecessary columns
poi_clean <- poi_sf_in %>%
  select(
    -places.displayName.languageCode,
    -places.reviewSummary.flagContentUri,
    -places.reviewSummary.reviewsUri,
    -places.reviewSummary.text.text,
    -places.reviewSummary.text.languageCode,
    -places.reviewSummary.disclosureText.text,
    -places.reviewSummary.disclosureText.languageCode)

## Show
poi_clean %>%
  head(10) %>%
  kableExtra::kable() %>%
  kableExtra::kable_styling(full_width = FALSE)
places.id places.types places.formattedAddress places.rating places.priceLevel places.userRatingCount places.takeout places.dineIn places.delivery places.displayName.text geometry
ChIJd5TQDg6B0IkRtBMqZ4QOWyo chinese_restaurant,japanese_restaurant,restaurant,point_of_interest,food,establishment 106 Fairgrounds Memorial Pkwy, Ithaca, NY 14850, USA 3.9 PRICE_LEVEL_INEXPENSIVE 1204 TRUE TRUE NA Spring Buffet POINT (-76.51203 42.43081)
ChIJaeZOwqCB0IkRn2nA111V5-A restaurant,sushi_restaurant,ramen_restaurant,american_restaurant,japanese_restaurant,point_of_interest,food,establishment 722 S Meadow St Suit 900, Ithaca, NY 14850, USA 4.8 PRICE_LEVEL_MODERATE 151 TRUE TRUE TRUE Pokelava Poke Bowl - Downtown POINT (-76.50925 42.4308)
ChIJx_f-hbGB0IkRZKH7lb_57PI sushi_restaurant,ramen_restaurant,tea_house,japanese_restaurant,restaurant,point_of_interest,food,establishment 740 S Meadow St, Ithaca, NY 14850, USA 4.2 PRICE_LEVEL_INEXPENSIVE 245 TRUE TRUE TRUE Taichi Bubble Tea - Ithaca POINT (-76.50849 42.4301)
ChIJlYR7IGiB0IkRbvPxgZHICPE ramen_restaurant,asian_restaurant,korean_restaurant,japanese_restaurant,restaurant,point_of_interest,food,establishment 512 W State St, Ithaca, NY 14850, USA 4.6 PRICE_LEVEL_MODERATE 927 TRUE TRUE TRUE Maru Ramen POINT (-76.5068 42.4395)
ChIJm-ch1pCB0IkRYkLCcVuEr_g korean_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment 124 E State St, Ithaca, NY 14850, USA 4.3 PRICE_LEVEL_MODERATE 261 TRUE TRUE TRUE Kimchi POINT (-76.49808 42.43981)
ChIJ_yx8Eo6B0IkRRfLPVFdcLPU korean_restaurant,restaurant,point_of_interest,food,establishment 147 Dryden Rd, Ithaca, NY 14850, USA 4.1 PRICE_LEVEL_MODERATE 118 TRUE TRUE TRUE So Poong POINT (-76.48597 42.44146)
ChIJT587Po6B0IkRiE2s9_l8u1c japanese_restaurant,sushi_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment 113 Dryden Rd, Ithaca, NY 14850, USA 3.7 PRICE_LEVEL_MODERATE 152 TRUE TRUE TRUE Plum Tree Restaurant POINT (-76.48656 42.44155)
ChIJH0xAa46B0IkRzZXALva_JXc korean_restaurant,chinese_restaurant,restaurant,point_of_interest,food,establishment 104 Dryden Rd, Ithaca, NY 14850, USA 3.9 PRICE_LEVEL_MODERATE 82 TRUE TRUE TRUE Gangnam Station POINT (-76.48639 42.44174)
ChIJrUH84Y2B0IkRVl6DSs39uPA korean_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment 321 College Ave, Ithaca, NY 14850, USA 4.0 PRICE_LEVEL_MODERATE 274 TRUE TRUE TRUE Koko POINT (-76.48503 42.44131)
ChIJo8Lj3o2B0IkRM5Lp3TT2GRs japanese_restaurant,restaurant,point_of_interest,food,establishment 401 College Ave, Ithaca, NY 14850, USA 4.3 PRICE_LEVEL_INEXPENSIVE 220 TRUE TRUE TRUE Oishii Bowl POINT (-76.48512 42.44172)

Explore and report findings

tmap_mode("view")
# Map (Korean, Japanese Restarunts)
poi_viz <- poi_clean %>%
  mutate(
    cuisine = case_when(
      grepl("japanese_restaurant", places.types) ~ "Japanese",
      grepl("korean_restaurant", places.types)   ~ "Korean",
      TRUE ~ "Other"
    )
  )

tm_shape(poi_viz) + 
  tm_dots(
    col = "cuisine",
    size = 0.7,
    palette = c("Japanese" = "red",
                "Korean"   = "blue"),
    popup.vars = c("Name" = "places.displayName.text",
                   "Rating" = "places.rating",
                   "Rating Count" = "places.userRatingCount")
  ) +
  tm_shape(ithaca) + 
  tm_borders()

It appears that Korean restaurants are clustered near Cornell University, on the east side of Ithaca. Interestingly, there are none in the southwest part of the city, which is the main grocery and shopping area. Since that area is not part of students’ everyday routines, Korean restaurant owners may have chosen to locate closer to where students spend most of their time, ensuring greater exposure in daily life. By contrast, three Japanese restaurants are situated in the southwest area. This might reflect that Japanese food is somewhat more familiar to the general public in Ithaca, making it reasonable for restaurants to locate near markets.

# Map
tm_shape(poi_clean) + 
  tm_dots(col = "places.rating", 
          size = "places.userRatingCount",
          palette = "magma",
          popup.vars = c("Name" = "places.displayName.text",
                         "Rating" = "places.rating",
                         "Rating Count" = "places.userRatingCount")) +
  tm_shape(ithaca) + 
  tm_borders()

Among the restaurants, Maru Ramen in the Downtown area stood out with both a high rating and a large number of customer reviews. In contrast, most restaurants in the eastern part of the city had fewer reviews and ratings below 4.2. Overall, there seems to be a rough pattern where a larger number of reviews corresponds to higher ratings, suggesting that satisfied customers may be more inclined to leave feedback.

# Map
tm_shape(poi_clean) + 
  tm_dots(col = "places.rating", 
          size = "places.priceLevel",
          size.scale = 0.1,
          palette = "magma",
          popup.vars = c("Name" = "places.displayName.text",
                         "Rating" = "places.rating",
                         "Rating Count" = "places.userRatingCount")) +
  tm_shape(ithaca) + 
  tm_borders()

Overall, the price levels of restaurants are fairly similar. However, those in the southwest area tend to be slightly less expensive. There are two restaurants with ratings above 4.6: one located in the southwest with a lower price level, and another in the east with a higher price level. Given the wide range of ratings in the eastern area, there does not appear to be a clear relationship between restaurant price level and rating.