tidycensus::census_api_key(Sys.getenv(“CENSUS_API_KEY”)) google_api_key <- Sys.getenv(“GOOGLE_API_KEY”) stopifnot(nzchar(google_api_key))

#load RDS from mini assignment one using code from lab 2 for the google pois pois <- readRDS(“C:/Users/jenny/OneDrive - Georgia Institute of Technology/Desktop/CP8883/Mini 1/google_poi_binghamton.rds”) #name of columns in mini 1 to refer back to names(pois)

Clean POIs from Mini 1 and convert to sf points

(Seen in EDA Hands-on exercise

#purpose = keep unique places, select columns, st_as_sf) poi_clean <- pois %>% dplyr::distinct(id, .keep_all = TRUE) %>%
dplyr::select( id, displayName.text, formattedAddress, types, location.latitude, location.longitude, rating, userRatingCount )

poi_points <- sf::st_as_sf(
#POINTS layer in WGS84 poi_clean, coords = c(“location.longitude”, “location.latitude”), crs = 4326 )

Pull ACS tracts for study area (for my mini, I choose Bing’s county (Broome) + two Albany-area counties (where I’m from))

(Seen in EDA Hands-on: get_acs twice with geometry=TRUE, output=“wide”)-> copied from exercise

#looked up exact table numbers on censusbureau website

#from lab, to cache shapefiles options(tigris_use_cache = TRUE)

tab_2015 <- tidycensus::get_acs( geography = “tract”, variables = c(median_rent_2br = “B25031_004E”), year = 2015, survey = “acs5”, state = state_abb, county = counties, geometry = FALSE, output = “wide”
)

tab_2023 <- tidycensus::get_acs( geography = “tract”, variables = c(median_rent_2br = “B25031_004E”), year = 2023, survey = “acs5”, state = state_abb, county = counties, geometry = FALSE, output = “wide” )

#tract geometries with tigris tract_geom <- tigris::tracts(state = state_abb, county = counties, year = 2023)

#join tables to shapes (same mutate/select pattern from EDA exercise) tracts <- tract_geom %>% dplyr::left_join( tab_2015 %>% dplyr::select(GEOID, median_rent_2br_2015 = median_rent_2br), by = “GEOID” ) %>% dplyr::left_join( tab_2023 %>% dplyr::select(GEOID, median_rent_2br_2023 = median_rent_2br), by = “GEOID” ) %>% dplyr::mutate(median_rent_2br_change = median_rent_2br_2023 - median_rent_2br_2015)

rows before/after

cat(“Rows (raw):”, nrow(pois), “”)

#remove dupes poi_clean <- pois %>% dplyr::distinct(id, .keep_all = TRUE) %>%
dplyr::select( id, displayName.text, formattedAddress, types, location.latitude, location.longitude, rating, userRatingCount ) %>% dplyr::filter(!is.na(location.latitude), !is.na(location.longitude))
#drop missing coords (EDA cleaning) cat(“Rows (dedup + non-missing coords):”, nrow(poi_clean), “”)

POINT sf layer in WGS84 (Seen in EDA exercise - pasted from)

poi_points <- sf::st_as_sf( poi_clean, coords = c(“location.longitude”, “location.latitude”), crs = 4326 )

#turns each type into own row for organization poi_types <- tidyr::unnest_longer(poi_clean, types)

top_types <- poi_types %>% dplyr::count(types, sort = TRUE) %>% dplyr::slice_head(n = 10)

Bar chart of top 10 types (EDA style ggplot bar)

ggplot2::ggplot(top_types, ggplot2::aes(x = reorder(types, n), y = n)) + ggplot2::geom_col() + ggplot2::coord_flip() + ggplot2::labs(x = “Type”, y = “Count”, title = “Top POI types”)

#make pretty colors ggplot2::ggplot(top_types, ggplot2::aes(x = reorder(types, n), y = n)) + ggplot2::geom_col(fill = “blue”) + ggplot2::coord_flip() + ggplot2::labs(x = “Type”, y = “Count”, title = “Top POI types”)

Ratings histogram with colors and formatting

ggplot2::ggplot(poi_clean, ggplot2::aes(rating)) + ggplot2::geom_histogram(binwidth = 0.2, boundary = 0, fill = “green”, color = “blue”) +
ggplot2::labs(x = “Rating”, y = “POI count”, title = “Distribution of POI ratings”)

Ratings vs number of reviews (log x)

#red color and increase size of points ggplot2::ggplot(poi_clean, ggplot2::aes(userRatingCount, rating)) + ggplot2::geom_point(alpha = 0.8, color = “red”) +
ggplot2::scale_x_log10() + ggplot2::labs(x = “User rating count (log scale)”, y = “Rating”, title = “Ratings vs. number of reviews”)

Change-fill choropleth palette

#learned myself from my previous GIS experience tmap::tm_shape(tracts) + tmap::tm_fill(“median_rent_2br_change”, palette = “-RdYlBu”) +
tmap::tm_borders()

#Color POI dots by rating with a palette used in class lab examples tmap::tm_shape(poi_points) + tmap::tm_dots(col = “rating”, size = “userRatingCount”, palette = “magma”)

#RESULTS from mini 2– what the lab asks for!!!!!!!!!!!!!!

#1. COMPARE 2 POI’S (PARKS VS MUSEUMS) #print charts and summarize # Seen in EDA: unnest list-column, then group_by/summarise poi_types_long <- tidyr::unnest_longer(poi_clean, types)

type_summary <- poi_types_long %>% dplyr::filter(types %in% c(“park”,“museum”)) %>% dplyr::group_by(types) %>% dplyr::summarise( n_places = dplyr::n_distinct(id), avg_rating = mean(rating, na.rm = TRUE), avg_reviews = mean(userRatingCount, na.rm = TRUE) ) %>% dplyr::arrange(desc(n_places))

print(type_summary)

quick bar for counts with color (same ggplot pattern as in EDA)

ggplot2::ggplot(type_summary, ggplot2::aes(x = types, y = n_places, fill = types)) + ggplot2::geom_col() + ggplot2::labs(x = “Type”, y = “Count”, title = “Parks vs. Museums”) + ggplot2::scale_fill_manual(values = c(park = “blue”, museum = “green”)) + ggplot2::theme(legend.position = “none”)

#2. AVERAGE RATING avg_rating <- poi_clean %>% dplyr::summarise(avg_rating = mean(rating, na.rm = TRUE))

print(avg_rating)

ggplot2::ggplot(poi_clean, ggplot2::aes(userRatingCount, rating)) + ggplot2::geom_point(alpha = 0.8, color = “purple”) + ggplot2::scale_x_log10() + ggplot2::labs( x = “User rating count (log scale)”, y = “Rating”, title = “Ratings vs. number of reviews” )

simple correlation chart

corr_rr <- with(poi_clean, cor(userRatingCount, rating, use = “complete.obs”)) cat(“Correlation between rating and # of reviews (log not applied):”, round(corr_rr, 3), “”, sep = ““)

#3. ASSOCIATION BETWEEN PRICE LEVEL AND RATING SCORE #unsure about this one if (“priceLevel” %in% names(poi_clean) || “places.priceLevel” %in% names(poi_clean)) {

price_col <- dplyr::case_when( “priceLevel” %in% names(poi_clean) ~ “priceLevel”, “places.priceLevel” %in% names(poi_clean) ~ “places.priceLevel” )

price_rating <- poi_clean %>% dplyr::filter(!is.na(.data[[price_col]])) %>% dplyr::group_by(.data[[price_col]]) %>% dplyr::summarise( n = dplyr::n(), avg_rate = mean(rating, na.rm = TRUE) ) %>% dplyr::rename(priceLevel = 1) %>% dplyr::arrange(priceLevel)

print(price_rating)

ggplot2::ggplot(price_rating, ggplot2::aes(x = factor(priceLevel), y = avg_rate)) + ggplot2::geom_col(fill = “purple”) + ggplot2::labs(x = “Price level”, y = “Average rating”, title = “Average rating by price level”) } else { cat(“Price level is not available in this POI dataset, so no price–rating analysis was produced in this mini assignment.”) }

#4. DO POIS CLUSTER OR EVENLY SPREAD? # Seen in EDA exercise tracts_joined <- sf::st_join(sf::st_transform(tracts, 3857), sf::st_transform(poi_points, 3857))

poi_per_tract <- tracts_joined %>% sf::st_drop_geometry() %>% dplyr::count(GEOID, name = “poi_n”)

tracts_counts <- tracts %>% dplyr::left_join(poi_per_tract, by = “GEOID”) %>% dplyr::mutate(poi_n = tidyr::replace_na(poi_n, 0L))

map for visual confirmation (same tmap pattern)

tmap::tmap_mode(“view”) tmap::tm_shape(tracts_counts) + tmap::tm_fill(“poi_n”, palette = “YlOrRd”) + tmap::tm_borders() + tmap::tm_layout(title = “POIs per tract”)

#5. ONE POI TO VISIT # Choose: highest rating, break ties by most reviews poi_pick <- poi_clean %>% dplyr::filter(!is.na(rating)) %>% dplyr::arrange(dplyr::desc(rating), dplyr::desc(userRatingCount)) %>% dplyr::slice(1) %>% dplyr::select(displayName.text, formattedAddress, rating, userRatingCount)

print(poi_pick)

print(“This was a fun lab!”)

MA_Zhaoxin_Mini2

2025-09-24

Clean POIs from Mini 1 and convert to sf points

(Seen in EDA Hands-on exercise

Pull ACS tracts for study area (for my mini, I choose Bing’s county (Broome) + two Albany-area counties (where I’m from))

(Seen in EDA Hands-on: get_acs twice with geometry=TRUE, output=“wide”)-> copied from exercise

rows before/after

POINT sf layer in WGS84 (Seen in EDA exercise - pasted from)

Bar chart of top 10 types (EDA style ggplot bar)

Ratings histogram with colors and formatting

Ratings vs number of reviews (log x)

Change-fill choropleth palette

quick bar for counts with color (same ggplot pattern as in EDA)

simple correlation chart

map for visual confirmation (same tmap pattern)

MA_Zhaoxin_Mini2

2025-09-24

Clean POIs from Mini 1 and convert to sf points

(Seen in EDA Hands-on exercise

Pull ACS tracts for study area (for my mini, I choose Bing’s county (Broome) + two Albany-area counties (where I’m from))

(Seen in EDA Hands-on: get_acs twice with geometry=TRUE, output=“wide”)-> copied from exercise

rows before/after

POINT sf layer in WGS84 (Seen in EDA exercise - pasted from)

Bar chart of top 10 types (EDA style ggplot bar)

Ratings histogram with colors and formatting

Ratings vs number of reviews (log x)

Change-fill choropleth palette

quick bar for counts with color (same ggplot pattern as in EDA)

simple correlation chart

print share of tracts that have any POIs (a simple clustering indicator)

map for visual confirmation (same tmap pattern)