#Import necessary packages
library(tidytransit)
library(tidyverse)
library(tidycensus)
library(tidygraph)
library(dplyr)
library(sf)
library(osmdata)
library(sfnetworks)
library(nominatimlite)
library(leaflet)
library(ggplot2)
# TASK ////////////////////////////////////////////////////////////////////////
# Download MARTA GTFS data and assign it to `gtfs` object
gtfs <- read_gtfs("https://www.itsmarta.com/google_transit_feed/google_transit.zip")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Edit stop_name to append serial numbers (1, 2, etc.) to remove duplicate names
stop_dist <- stop_group_distances(gtfs$stops, by='stop_name') %>%
filter(dist_max > 200)
gtfs$stops <- gtfs$stops %>%
group_by(stop_name) %>%
mutate(stop_name = case_when(stop_name %in% stop_dist$stop_name ~ paste0(stop_name, " (", seq(1,n()), ")"),
TRUE ~ stop_name))
# Create a transfer table
gtfs <- gtfsrouter::gtfs_transfer_table(gtfs,
d_limit = 200,
min_transfer_time = 120)
# NOTE: Converting to sf format uses stop_lat and stop_lon columns contained in gtfs$stops.
# In the conversion process, stop_lat and stop_lon are converted into a geometry column, and
# the output sf object do not have the lat lon column anymore.
# But many other functions in tidytransit look for stop_lat and stop_lon.
# So I re-create them using mutate().
gtfs <- gtfs %>% gtfs_as_sf(crs = 4326)
gtfs$stops <- gtfs$stops %>%
ungroup() %>%
mutate(stop_lat = st_coordinates(.)[,2],
stop_lon = st_coordinates(.)[,1])
# Get stop_id for rails and buses
rail_stops <- gtfs$routes %>%
filter(route_type %in% c(1)) %>%
inner_join(gtfs$trips, by = "route_id") %>%
inner_join(gtfs$stop_times, by = "trip_id") %>%
inner_join(gtfs$stops, by = "stop_id") %>%
group_by(stop_id) %>%
slice(1) %>%
pull(stop_id)
# Extract MARTA rail stations
station <- gtfs$stops %>% filter(stop_id %in% rail_stops)
# Extract Midtown Station
midtown <- gtfs$stops %>% filter(stop_id == "134")
# Create a bounding box to which we limit our analysis
bbox <- st_bbox(c(xmin = -84.45241, ymin = 33.72109, xmax = -84.35009, ymax = 33.80101),
crs = st_crs(4326)) %>%
st_as_sfc()
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# Specify Census API key whichever you prefer using census_api_key() function
census_api_key(Sys.getenv("CENSUS_API"))
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Using get_acs() function, download Census Tract level data for 2022 for Fulton, DeKalb, and Clayton in GA.
# and assign it to `census` object.
# Make sure you set geometry = TRUE.
# Required data from the Census ACS:
# 1) Median Household Income (name the column `hhinc`)
# 2) Minority Population (%) (name the column `pct_minority`)
# Note: You may need to download two or more Census ACS variables to calculate minority population (%). "Minority" here can refer to either racial minorities or racial+ethnic minorities -- it's your choice.
census <- get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "DeKalb", "Clayton"),
variables = c(hhinc = 'B19013_001',
totalpop = 'B02001_001',
whitepop = 'B02001_002'),
year = 2022,
survey = "acs5",
geometry = TRUE, #sf object
output = "wide"
)%>%
select(GEOID, NAME, hhincE, totalpopE, whitepopE) %>%
mutate(hhinc = hhincE,
pct_minority = (1-(whitepopE/totalpopE))*100) %>%
select(GEOID, NAME, hhinc, pct_minority)
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Convert the CRS of `census` to GCS
census <- census %>%
st_transform(crs = 4326)
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# get centroids of `census` polygons,
# extract centroids that fall inside the `bbox`,
# and assign it to `home` object.
home <- census %>%
st_centroid() %>%
st_intersection(bbox)
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Get OSM road data for all road types for the `bbox` area.
# 3. Convert the OSM data into an sf object
# 4. Convert osmdata polygons into lines
osm_road <- opq(bbox = bbox) %>%
add_osm_feature(key = 'highway',
value = c("motorway", "motorway_link",
"trunk", "trunk_link",
"primary", "primary_link",
"secondary", "secondary_link",
"tertiary", "residential")) %>%
osmdata_sf() %>%
osm_poly2line()
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Convert osm_road$osm_line into sfnetwork
# 2. Activate edges
# 3. Clean the network using the methods we learned
# 4. Assign the cleaned network to an object named 'osm'
osm <- osm_road$osm_line %>%
#select(osm_id, highway) %>%
select(geometry) %>%
sfnetworks::as_sfnetwork(directed=FALSE) %>%
#Activate edges
activate("edges") %>%
filter(!edge_is_multiple()) %>% #remove multiple edges
filter(!edge_is_loop()) %>% #remove loops
#Subdivide edges
convert(sfnetworks::to_spatial_subdivision) %>%
#Delete pseudo nodes
convert(sfnetworks::to_spatial_smooth)
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Add a new column named 'length' to the edges part of the object `osm`.
osm <- osm %>%
activate("edges") %>%
mutate(length = edge_length())
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Extract the first row from `home` object and store it `home_1`
home_1 <- home[1,]
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# Find the shortest paths from `home_1` to all other stations
paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Get shortest network distances from `home_1` to all other stations.
dist_all <- map_dbl(1:nrow(paths), function(x){
osm %>%
activate("nodes") %>%
slice(paths$node_paths[[x]]) %>%
st_as_sf("edges") %>%
pull(length) %>%
sum()
}) %>% unlist()
# Replace zeros with a large value.
if (any(dist_all == 0)){
dist_all[dist_all == 0] <- max(dist_all)
}
# TASK ////////////////////////////////////////////////////////////////////////
# Find the closest station and assign the value to `closest_station`
closest_station <- station[which.min(dist_all), ]
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Get the distance to the closest station.
closest_dist <- dist_all[which.min(dist_all)]
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Calculate travel time based on the `closest_dist` assuming we drive at 20 miles/hour speed.
# Assign the value to `trvt_osm_m` object.
speed_meterph <- 20 * 1609.34
trvt_osm_m <- (closest_dist / speed_meterph) * 60 #meter/minute
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Create a subset of stop_times data table for date = 2025-11-10,
# minimum departure time of 7AM, maximum departure time of 10AM.
# Assign the output to `am_stop_time` object
am_stop_time <- filter_stop_times(gtfs_obj = gtfs,
extract_date = "2025-11-10",
min_departure_time = 3600*7,
max_arrival_time = 3600*10)
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Calculate travel times from the `closest_station` to all other stations
# during time specified in `am_stop_time`. Allow ONE transfer.
# 2. Filter the row where the destination is Midtown station.
# 3. Assign it to `trvt` object.
trvt <- travel_times(filtered_stop_times = am_stop_time,
stop_name = closest_station$stop_name,
time_range = 3600,
arrival = FALSE, # if TRUE, all journeys end at Midtown station.
max_transfers = 1,
return_coords = TRUE) %>%
filter(to_stop_name == "MIDTOWN STATION")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Divide the calculated travel time by 60 to convert the unit from seconds to minutes.
trvt_gtfs_m <- trvt$travel_time/60
# Add the travel time from home to the nearest station and
# the travel time from the nearest station to Midtown station
total_trvt <- trvt_osm_m + trvt_gtfs_m
# =========== NO MODIFY ZONE ENDS HERE ========================================
# Function definition
get_trvt <- function(home, osm, station, midtown){
# TASK ////////////////////////////////////////
# If the code in Step 4 runs fine,
# Replace where it says **YOUR CODE HERE..** below with
# the ENTIRETY of the previous code chunk (i.e., Step 4)
home_1 <- home[1,]
# Find the shortest paths from `home_1` to all other stations
paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")
# Get shortest network distances from `home_1` to all other stations.
dist_all <- map_dbl(1:nrow(paths), function(x){
osm %>%
activate("nodes") %>%
slice(paths$node_paths[[x]]) %>%
st_as_sf("edges") %>%
pull(length) %>%
sum()
}) %>% unlist()
# Replace zeros with a large value.
if (any(dist_all == 0)){
dist_all[dist_all == 0] <- max(dist_all)
}
# Find the closest station and assign the value to `closest_station`
closest_station <- station[which.min(dist_all), ]
# Get the distance to the closest station.
closest_dist <- dist_all[which.min(dist_all)]
# Calculate travel time based on the `closest_dist` assuming we drive at 20 miles/hour speed.
# Assign the value to `trvt_osm_m` object.
speed_meterph <- 20 * 1609.34
trvt_osm_m <- (closest_dist / speed_meterph) * 60 #meter/minute
# Create a subset of stop_times data table for date = 2025-11-10,
# minimum departure time of 7AM, maximum departure time of 10AM.
# Assign the output to `am_stop_time` object
am_stop_time <- filter_stop_times(gtfs_obj = gtfs,
extract_date = "2025-11-10",
min_departure_time = 3600*7,
max_arrival_time = 3600*10)
# 1. Calculate travel times from the `closest_station` to all other stations
# during time specified in `am_stop_time`. Allow ONE transfer.
# 2. Filter the row where the destination is Midtown station.
# 3. Assign it to `trvt` object.
trvt <- travel_times(filtered_stop_times = am_stop_time,
stop_name = closest_station$stop_name,
time_range = 3600,
arrival = FALSE, # if TRUE, all journeys end at Midtown station.
max_transfers = 1,
return_coords = TRUE) %>%
filter(to_stop_name == "MIDTOWN STATION")
# Divide the calculated travel time by 60 to convert the unit from seconds to minutes.
trvt_gtfs_m <- trvt$travel_time/60
# Add the travel time from home to the nearest station and
# the travel time from the nearest station to Midtown station
total_trvt <- trvt_osm_m + trvt_gtfs_m
# //TASK //////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
if (length(total_trvt) == 0) {total_trvt = 0}
return(total_trvt)
# =========== NO MODIFY ZONE ENDS HERE ========================================
}
# Prepare an empty vector
total_trvt <- vector("numeric", nrow(home))
# Apply the function to all Census Tracts
# Fill `total_trvt` object with the calculated time
for (i in 1:nrow(home)){
total_trvt[i] <- get_trvt(home[i,], osm, station, midtown)
}
# cbind the calculated travel time to `home`
home <- home %>%
cbind(trvt = total_trvt)
# TASK ////////////////////////////////////////
# Create an interactive map displaying `census` (polygons) and `home` (points), effectively visualizing household income and travel time to Midtown Station, respectively.
#Define color palettes
palette_hhinc <- colorNumeric("GnBu", domain = census$hhinc, na.color = "transparent")
palette_trvt <- colorNumeric("Reds", domain = home$trvt, na.color = "transparent")
#map on leaflet
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = census,
fillColor = ~palette_hhinc(hhinc),
fillOpacity = 0.8,
color = "#616161",
weight = 1,
popup = ~paste0("<b>Census Tract: </b>", NAME, "<br>",
"<b>Median Income: $</b>", round(hhinc))
) %>%
addCircleMarkers(
data = home,
radius = 4.5,
color = ~palette_trvt(trvt),
stroke = TRUE,
fillOpacity = 0.9,
popup = ~paste0("<b>Travel Time: </b>", round(trvt, 1), " minute(s)")
) %>%
addLegend("bottomleft", pal = palette_hhinc, values = census$hhinc,
title = "Median Household Income", opacity = 0.8) %>%
addLegend("bottomright", pal = palette_trvt, values = home$trvt,
title = "Travel Time (minutes)", opacity = 0.9)
# //TASK //////////////////////////////////////
# TASK ////////////////////////////////////////
# Create an interactive map displaying `census` (polygons) and `home` (points) effectively visualizing the percentage of minority population and travel time to Midtown Station, respectively.
palette_minority <- colorNumeric("Purples", domain = census$pct_minority, na.color = "transparent")
#map on leaflet
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = census,
fillColor = ~palette_minority(pct_minority),
fillOpacity = 0.6,
color = "#616161",
weight = 1,
popup = ~paste0("<b>Census Tract: </b>", NAME, "<br>",
"<b>Percentage of Minority Population: </b>", round(pct_minority, 1), "%")
) %>%
addCircleMarkers(
data = home,
radius = 4.5,
color = ~palette_trvt(trvt),
stroke = TRUE,
fillOpacity = 0.9,
popup = ~paste0("<b>Travel Time: </b>", round(trvt, 1), " minute(s)")
) %>%
addLegend("bottomleft", pal = palette_minority, values = census$pct_minority,
title = "% of Minority Population", opacity = 0.6) %>%
addLegend("bottomright", pal = palette_trvt, values = home$trvt,
title = "Travel Time (minutes)", opacity = 0.9)
# //TASK //////////////////////////////////////
# TASK ////////////////////////////////////////
# Create a scatter plot with a trend line showing the relationship between household income (x-axis) and travel time to Midtown Station (y-axis).
ggplot(home, aes(x = hhinc, y = trvt)) +
geom_point(size = 3.5, color = "#edbe66") +
geom_smooth(method = "lm", linewidth = 1.2, color = "#b85656", se = FALSE) +
labs(
title = "Scatterplot of Median Household Income & Travel Time to MARTA \nMidtown Station",
x = "Median Household Income ($)",
y = "Travel Time (minutes)"
)
# //TASK //////////////////////////////////////
# TASK ////////////////////////////////////////
# Create a scatter plot with a trend line showing the relationship between the percentage of minority population (x-axis) and travel time to Midtown Station (y-axis).
ggplot(home, aes(x = pct_minority, y = trvt)) +
geom_point(size = 3.5, color = "#c291db") +
geom_smooth(method = "lm", linewidth = 1.2, color = "#5537ad", se = FALSE) +
labs(
title = "Scatterplot of Percentage of Minority Population & Travel Time to \nMARTA Midtown Station",
x = "Percentage of Minority Population (%)",
y = "Travel Time (minutes)"
)
# //TASK //////////////////////////////////////
The interactive map of median household income and travel time shows that, spatially, there isn’t a clear relationship between income and travel time. Census tracts north of Midtown have both higher median household income and short travel time to MARTA Midtown Station, but tracts south of Midtown have lower median household income and also shorter travel times. The scatterplot strips away the spatial aspect and purely looks at the relationship between income and travel time. The trend line shows that there is a slight positive relationship between income and travel time, indicating that increasing income also correlates to increasing travel time.
The interactive map of percentage of minority population and travel shows that, spatially, many of the census tracts southwest of Midtown have both high percentage of minority population and travel time. Looking at the scatterplot reveals that there is a very weak to none correlation between minority population and travel time.