tidycensus::census_api_key(Sys.getenv(“CENSUS_API_KEY”)) google_api_key <- Sys.getenv(“GOOGLE_API_KEY”) stopifnot(nzchar(google_api_key))

#load RDS from mini assignment one using code from lab 2 for the google pois pois <- readRDS(“C:/Users/jenny/OneDrive - Georgia Institute of Technology/Desktop/CP8883/Mini 1/google_poi_binghamton.rds”) #name of columns in mini 1 to refer back to names(pois)

Clean POIs from Mini 1 and convert to sf points

(Seen in EDA Hands-on exercise

#purpose = keep unique places, select columns, st_as_sf) poi_clean <- pois %>% dplyr::distinct(id, .keep_all = TRUE) %>%
dplyr::select( id, displayName.text, formattedAddress, types, location.latitude, location.longitude, rating, userRatingCount )

poi_points <- sf::st_as_sf(
#POINTS layer in WGS84 poi_clean, coords = c(“location.longitude”, “location.latitude”), crs = 4326 )

Pull ACS tracts for study area (for my mini, I choose Bing’s county (Broome) + two Albany-area counties (where I’m from))

(Seen in EDA Hands-on: get_acs twice with geometry=TRUE, output=“wide”)-> copied from exercise

#looked up exact table numbers on censusbureau website

#from lab, to cache shapefiles options(tigris_use_cache = TRUE)

tab_2015 <- tidycensus::get_acs( geography = “tract”, variables = c(median_rent_2br = “B25031_004E”), year = 2015, survey = “acs5”, state = state_abb, county = counties, geometry = FALSE, output = “wide”
)

tab_2023 <- tidycensus::get_acs( geography = “tract”, variables = c(median_rent_2br = “B25031_004E”), year = 2023, survey = “acs5”, state = state_abb, county = counties, geometry = FALSE, output = “wide” )

#tract geometries with tigris tract_geom <- tigris::tracts(state = state_abb, county = counties, year = 2023)

#join tables to shapes (same mutate/select pattern from EDA exercise) tracts <- tract_geom %>% dplyr::left_join( tab_2015 %>% dplyr::select(GEOID, median_rent_2br_2015 = median_rent_2br), by = “GEOID” ) %>% dplyr::left_join( tab_2023 %>% dplyr::select(GEOID, median_rent_2br_2023 = median_rent_2br), by = “GEOID” ) %>% dplyr::mutate(median_rent_2br_change = median_rent_2br_2023 - median_rent_2br_2015)

rows before/after

cat(“Rows (raw):”, nrow(pois), “”)

#remove dupes poi_clean <- pois %>% dplyr::distinct(id, .keep_all = TRUE) %>%
dplyr::select( id, displayName.text, formattedAddress, types, location.latitude, location.longitude, rating, userRatingCount ) %>% dplyr::filter(!is.na(location.latitude), !is.na(location.longitude))
#drop missing coords (EDA cleaning) cat(“Rows (dedup + non-missing coords):”, nrow(poi_clean), “”)

POINT sf layer in WGS84 (Seen in EDA exercise - pasted from)

poi_points <- sf::st_as_sf( poi_clean, coords = c(“location.longitude”, “location.latitude”), crs = 4326 )

#turns each type into own row for organization poi_types <- tidyr::unnest_longer(poi_clean, types)

top_types <- poi_types %>% dplyr::count(types, sort = TRUE) %>% dplyr::slice_head(n = 10)

Bar chart of top 10 types (EDA style ggplot bar)

ggplot2::ggplot(top_types, ggplot2::aes(x = reorder(types, n), y = n)) + ggplot2::geom_col() + ggplot2::coord_flip() + ggplot2::labs(x = “Type”, y = “Count”, title = “Top POI types”)

#make pretty colors ggplot2::ggplot(top_types, ggplot2::aes(x = reorder(types, n), y = n)) + ggplot2::geom_col(fill = “blue”) + ggplot2::coord_flip() + ggplot2::labs(x = “Type”, y = “Count”, title = “Top POI types”)

Ratings histogram with colors and formatting

ggplot2::ggplot(poi_clean, ggplot2::aes(rating)) + ggplot2::geom_histogram(binwidth = 0.2, boundary = 0, fill = “green”, color = “blue”) +
ggplot2::labs(x = “Rating”, y = “POI count”, title = “Distribution of POI ratings”)

Ratings vs number of reviews (log x)

#red color and increase size of points ggplot2::ggplot(poi_clean, ggplot2::aes(userRatingCount, rating)) + ggplot2::geom_point(alpha = 0.8, color = “red”) +
ggplot2::scale_x_log10() + ggplot2::labs(x = “User rating count (log scale)”, y = “Rating”, title = “Ratings vs. number of reviews”)

Change-fill choropleth palette

#learned myself from my previous GIS experience tmap::tm_shape(tracts) + tmap::tm_fill(“median_rent_2br_change”, palette = “-RdYlBu”) +
tmap::tm_borders()

#Color POI dots by rating with a palette used in class lab examples tmap::tm_shape(poi_points) + tmap::tm_dots(col = “rating”, size = “userRatingCount”, palette = “magma”)

#RESULTS from mini 2– what the lab asks for!!!!!!!!!!!!!!

#1. COMPARE 2 POI’S (PARKS VS MUSEUMS) #print charts and summarize # Seen in EDA: unnest list-column, then group_by/summarise poi_types_long <- tidyr::unnest_longer(poi_clean, types)

type_summary <- poi_types_long %>% dplyr::filter(types %in% c(“park”,“museum”)) %>% dplyr::group_by(types) %>% dplyr::summarise( n_places = dplyr::n_distinct(id), avg_rating = mean(rating, na.rm = TRUE), avg_reviews = mean(userRatingCount, na.rm = TRUE) ) %>% dplyr::arrange(desc(n_places))

print(type_summary)

quick bar for counts with color (same ggplot pattern as in EDA)

ggplot2::ggplot(type_summary, ggplot2::aes(x = types, y = n_places, fill = types)) + ggplot2::geom_col() + ggplot2::labs(x = “Type”, y = “Count”, title = “Parks vs. Museums”) + ggplot2::scale_fill_manual(values = c(park = “blue”, museum = “green”)) + ggplot2::theme(legend.position = “none”)

#2. AVERAGE RATING avg_rating <- poi_clean %>% dplyr::summarise(avg_rating = mean(rating, na.rm = TRUE))

print(avg_rating)

ggplot2::ggplot(poi_clean, ggplot2::aes(userRatingCount, rating)) + ggplot2::geom_point(alpha = 0.8, color = “purple”) + ggplot2::scale_x_log10() + ggplot2::labs( x = “User rating count (log scale)”, y = “Rating”, title = “Ratings vs. number of reviews” )

simple correlation chart

corr_rr <- with(poi_clean, cor(userRatingCount, rating, use = “complete.obs”)) cat(“Correlation between rating and # of reviews (log not applied):”, round(corr_rr, 3), “”, sep = ““)

#3. ASSOCIATION BETWEEN PRICE LEVEL AND RATING SCORE #unsure about this one if (“priceLevel” %in% names(poi_clean) || “places.priceLevel” %in% names(poi_clean)) {

price_col <- dplyr::case_when( “priceLevel” %in% names(poi_clean) ~ “priceLevel”, “places.priceLevel” %in% names(poi_clean) ~ “places.priceLevel” )

price_rating <- poi_clean %>% dplyr::filter(!is.na(.data[[price_col]])) %>% dplyr::group_by(.data[[price_col]]) %>% dplyr::summarise( n = dplyr::n(), avg_rate = mean(rating, na.rm = TRUE) ) %>% dplyr::rename(priceLevel = 1) %>% dplyr::arrange(priceLevel)

print(price_rating)

ggplot2::ggplot(price_rating, ggplot2::aes(x = factor(priceLevel), y = avg_rate)) + ggplot2::geom_col(fill = “purple”) + ggplot2::labs(x = “Price level”, y = “Average rating”, title = “Average rating by price level”) } else { cat(“Price level is not available in this POI dataset, so no price–rating analysis was produced in this mini assignment.”) }

#4. DO POIS CLUSTER OR EVENLY SPREAD? # Seen in EDA exercise tracts_joined <- sf::st_join(sf::st_transform(tracts, 3857), sf::st_transform(poi_points, 3857))

poi_per_tract <- tracts_joined %>% sf::st_drop_geometry() %>% dplyr::count(GEOID, name = “poi_n”)

tracts_counts <- tracts %>% dplyr::left_join(poi_per_tract, by = “GEOID”) %>% dplyr::mutate(poi_n = tidyr::replace_na(poi_n, 0L))

map for visual confirmation (same tmap pattern)

tmap::tmap_mode(“view”) tmap::tm_shape(tracts_counts) + tmap::tm_fill(“poi_n”, palette = “YlOrRd”) + tmap::tm_borders() + tmap::tm_layout(title = “POIs per tract”)

#5. ONE POI TO VISIT # Choose: highest rating, break ties by most reviews poi_pick <- poi_clean %>% dplyr::filter(!is.na(rating)) %>% dplyr::arrange(dplyr::desc(rating), dplyr::desc(userRatingCount)) %>% dplyr::slice(1) %>% dplyr::select(displayName.text, formattedAddress, rating, userRatingCount)

print(poi_pick)

print(“This was a fun lab!”)