library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tmap)
## Warning: package 'tmap' was built under R version 4.3.3
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(units)
## udunits database from C:/Users/Hina/AppData/Local/R/win-library/4.3/units/share/udunits/udunits2.xml
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
library(dbscan)
## Warning: package 'dbscan' was built under R version 4.3.3
##
## Attaching package: 'dbscan'
##
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(sfnetworks)
## Warning: package 'sfnetworks' was built under R version 4.3.3
library(tigris)
## Warning: package 'tigris' was built under R version 4.3.3
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 4.3.3
##
## Attaching package: 'tidygraph'
##
## The following object is masked from 'package:stats':
##
## filter
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(osmdata)
## Warning: package 'osmdata' was built under R version 4.3.3
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(here)
## Warning: package 'here' was built under R version 4.3.3
## here() starts at C:/Users/Hina/Downloads
library(tidytransit)
## Warning: package 'tidytransit' was built under R version 4.3.3
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.3.3
library(leafsync)
## Warning: package 'leafsync' was built under R version 4.3.3
epsg <- 4326
# TASK ////////////////////////////////////////////////////////////////////////
# Download MARTA (Metropolitan Atlanta Rapid Transit Authority) GTFS data using `read_gtfs()` function and assign it to `gtfs` object
gtfs <- read_gtfs("https://www.itsmarta.com/google_transit_feed/google_transit.zip")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Edit stop_name to append serial numbers (1, 2, etc.) to remove duplicate names
stop_dist <- stop_group_distances(gtfs$stops, by='stop_name') %>%
filter(dist_max > 200)
gtfs$stops <- gtfs$stops %>%
group_by(stop_name) %>%
mutate(stop_name = case_when(stop_name %in% stop_dist$stop_name ~ paste0(stop_name, " (", seq(1,n()), ")"),
TRUE ~ stop_name))
# Create a transfer table
gtfs <- gtfsrouter::gtfs_transfer_table(gtfs,
d_limit = 200,
min_transfer_time = 120)
## Registered S3 method overwritten by 'gtfsrouter':
## method from
## summary.gtfs gtfsio
# NOTE: Converting to sf format uses stop_lat and stop_lon columns contained in gtfs$stops.
# In the conversion process, stop_lat and stop_lon are converted into a geometry column, and
# the output sf object do not have the lat lon column anymore.
# But many other functions in tidytransit look for stop_lat and stop_lon.
# So I re-create them using mutate().
gtfs <- gtfs %>% gtfs_as_sf(crs = epsg)
gtfs$stops <- gtfs$stops %>%
ungroup() %>%
mutate(stop_lat = st_coordinates(.)[,2],
stop_lon = st_coordinates(.)[,1])
# Get stop_id for rails and buses
rail_stops <- gtfs$routes %>%
filter(route_type %in% c(1)) %>%
inner_join(gtfs$trips, by = "route_id") %>%
inner_join(gtfs$stop_times, by = "trip_id") %>%
inner_join(gtfs$stops, by = "stop_id") %>%
group_by(stop_id) %>%
slice(1) %>%
pull(stop_id)
# Extract MARTA rail stations
station <- gtfs$stops %>% filter(stop_id %in% rail_stops)
# Extract Midtown Station
midtown <- gtfs$stops %>% filter(stop_id == "134")
# Create a bounding box to which we limit our analysis
bbox <- st_bbox(c(xmin = -84.45241, ymin = 33.72109, xmax = -84.35009, ymax = 33.80101),
crs = st_crs(4326)) %>%
st_as_sfc()
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# Specify Census API key whichever you prefer using census_api_key() function
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Using get_acs() function, download Census Tract level data for 2022 for Fulton, DeKalb, and Clayton in GA.
# and assign it to `census` object.
# Make sure you set geometry = TRUE.
# Required data from the Census ACS:
# 1) Median Household Income (name the column `hhinc`)
# 2) Minority Population (%) (name the column `pct_minority`)
# Note: You may need to download two or more Census ACS variables to calculate minority population (%). "Minority" here can refer to either racial minorities or racial+ethnic minorities -- it's your choice.
census <- get_acs(
geography = "tract",
state = "GA",
county = c("Fulton","Dekalb","Clayton"),
variables = c(hhinc = "B19019_001",
white_nh = "B03002_003",
total = "B01003_001"),
year = 2022,
survey = "acs5",
geometry = TRUE,
output = "wide") %>%
select(GEOID, NAME, hhinc = hhincE, white_nh = white_nhE, total = totalE) %>%
mutate(pct_minority = (1 - (white_nh/total))*100)
## Getting data from the 2018-2022 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | | | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 14% | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============== | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 71% | |================================================== | 71% | |=================================================== | 73% | |==================================================== | 74% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
census <- census %>%
st_transform(crs = 4326) %>%
separate(col = NAME, into = c("tract", "county", "state"), sep = "; ")
# Convert it to POINT at polygon centroids and extract those that fall into bbox
# and assign it into `home` object
home <- census %>% st_centroid() %>% .[bbox,]
## Warning: st_centroid assumes attributes are constant over geometries
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Get OSM data using opq() function and bbox object defined in the previous code chunk.
# 2. Specify arguments for add_osm_feature() function using
# key = 'highway' and
# value = c("motorway", "trunk", "primary", "secondary", "tertiary", "residential",
# "motorway_link", "trunk_link", "primary_link", "secondary_link",
# "tertiary_link", "residential_link", "unclassified")
# 3. Convert the OSM data into an sf object using osmdata_sf() function
# 4. Convert osmdata polygons into lines using osm_poly2line() function
osm_road <- opq(bbox = bbox) %>%
add_osm_feature(key = 'highway',
value = c("motorway", "trunk", "primary", "secondary", "tertiary", "residential", "motorway_link", "trunk_link", "primary_link", "secondary_link", "tertiary_link", "residential_link", "unclassified")) %>%
osmdata_sf() %>%
osm_poly2line()
tmap_mode('plot')
## tmap mode set to plotting
tm_shape(osm_road$osm_lines) + tm_lines(col = "highway")
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Convert osm_road$osm_lines into sfnetwork using as_sfnetwork() function
# 2. Activate edges
# 3. Clean the network using edge_is_multiple(), edge_is_loop(), to_spatial_subdivision(), to_spatial_smooth()
# 4. Assign the cleaned network to an object named 'osm'
osm <- osm_road$osm_lines %>%
select(osm_id, highway)
osm <- sfnetworks::as_sfnetwork(osm, directed = FALSE)
print(osm)
## # A sfnetwork with 6081 nodes and 5404 edges
## #
## # CRS: EPSG:4326
## #
## # An undirected multigraph with 1251 components with spatially explicit edges
## #
## # A tibble: 6,081 × 1
## geometry
## <POINT [°]>
## 1 (-84.34923 33.74581)
## 2 (-84.35038 33.7454)
## 3 (-84.35015 33.74516)
## 4 (-84.34924 33.745)
## 5 (-84.37242 33.74325)
## 6 (-84.36738 33.74324)
## # ℹ 6,075 more rows
## #
## # A tibble: 5,404 × 5
## from to osm_id highway geometry
## <int> <int> <chr> <chr> <LINESTRING [°]>
## 1 1 2 9164335 motorway_link (-84.34923 33.74581, -84.3491 33.74624, -84…
## 2 3 4 9165104 motorway_link (-84.35015 33.74516, -84.34948 33.74517, -8…
## 3 5 6 9186247 motorway (-84.37242 33.74325, -84.37048 33.74326, -8…
## # ℹ 5,401 more rows
print(paste0('Which one is active?: ', sfnetworks::active(osm)))
## [1] "Which one is active?: nodes"
osm2 <- osm %>%
activate("edges") %>%
filter(!edge_is_multiple()) %>%
filter(!edge_is_loop())
# print out the differences in the edge count.
message(str_c("Before simplification, there were ", osm %>% st_as_sf("edges") %>% nrow(), " edges. \n",
"After simplification, there are ", osm2 %>% st_as_sf("edges") %>% nrow(), " edges."))
## Before simplification, there were 5404 edges.
## After simplification, there are 5360 edges.
# Using spatial morpher to subdivide
subdiv_osm2 <- convert(osm2, sfnetworks::to_spatial_subdivision)
## Warning: to_spatial_subdivision assumes attributes are constant over geometries
# Using spatial morpher to smooth
smoothed_osm2 <- convert(subdiv_osm2, sfnetworks::to_spatial_smooth)
osm <- smoothed_osm2
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Add a new column named 'length' to the edges part of the object `osm`.
osm <- osm %>%
st_transform(4326) %>% activate("edges") %>% mutate(length = edge_length())
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Extract the first row from `home` object and store it `home_1`
home_1 <- home[1,]
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# Find the shortest path from `home_1` to all other stations
# using st_network_paths() function.
paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Using the `paths` object, get network distances from `home_1` to all other stations.
dist_all <- map_dbl(1:nrow(paths), function(x){
osm %>%
activate("nodes") %>%
slice(paths$node_paths[[x]]) %>%
st_as_sf("edges") %>%
pull(length) %>%
sum()
}) %>% unlist()
# Replace zeros with a large value.
if (any(dist_all == 0)){
dist_all[dist_all == 0] <- max(dist_all)
}
# Find the closest station.
closest_index <- which.min(dist_all)
closest_station <- station[closest_index,]
# Find the distance to the closest station.
closest_dist <- min(dist_all)
# Calculate how long it takes to traverse `closest_dist`
# assuming we drive at 30 miles/hour speed.
# Store the output in trvt_osm_m.
car_speed <- set_units(30, mile/h)
trvt_osm_m <- closest_dist/set_units(car_speed, m/min) %>% # Distance divided by 30 mile/h
as.vector(.)
# =========== NO MODIFY ZONE ENDS HERE ========================================
# TASK ////////////////////////////////////////////////////////////////////////
# 1. From `osm` object, activate nodes part and
# 2. use `closest_index` to extract the selected path
paths_closest <- osm %>%
activate("nodes") %>%
slice(paths$node_paths[[closest_index]])
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# Use filter_stop_times() function to create a subset of stop_times data table
# for date = 2024-11-14, minimum departure time of 7AM, maximum departure time of 10AM.
# Assign the output to `am_stop_time` object
am_stop_time <- filter_stop_times(gtfs_obj = gtfs,
extract_date = "2024-11-14",
min_departure_time = 3600*7,
max_arrival_time = 3600*10)
# //TASK //////////////////////////////////////////////////////////////////////
# TASK ////////////////////////////////////////////////////////////////////////
# 1. Use travel_times() function to calculate travel times from the `closest_station`
# to all other stations during time specified in am_stop_time. Allow ONE transfer.
# 2. Filter the row for which the value of 'to_stop_name' column
# equals midtown$stop_name. Assign it into `trvt` object.
trvt <- travel_times(filtered_stop_times = am_stop_time,
stop_name = closest_station,
time_range = 3600,
arrival = FALSE,
max_transfers = 1,
return_coords = TRUE) %>%
filter(to_stop_name == "MIDTOWN STATION")
# //TASK //////////////////////////////////////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Divide the calculated travel time by 60 to convert the unit from seconds to minutes.
trvt_gtfs_m <- trvt$travel_time/60
# Add the travel time from home to the nearest station and
# the travel time from the nearest station to Midtown station
total_trvt <- trvt_osm_m + trvt_gtfs_m
# =========== NO MODIFY ZONE ENDS HERE ========================================
# Function definition (do not modify other parts of the code in this code chunk except for those inside the TASK section)
get_trvt <- function(home, osm, station, midtown){
# TASK ////////////////////////////////////////
# If the code in Step 4 runs fine,
# Replace where it says **YOUR CODE HERE..** below with
# the ENTIRETY of the code in the previous code chunk (i.e., Step 4)
# Extract the first row from `home` object and store it `home_1`
home_1 <- home[1,]
paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")
dist_all <- map_dbl(1:nrow(paths), function(x){
osm %>%
activate("nodes") %>%
slice(paths$node_paths[[x]]) %>%
st_as_sf("edges") %>%
pull(length) %>%
sum()
}) %>% unlist()
# Replace zeros with a large value.
if (any(dist_all == 0)){
dist_all[dist_all == 0] <- max(dist_all)
}
# Find the closest station.
closest_index <- which.min(dist_all)
closest_station <- station[closest_index,]
# Find the distance to the closest station.
closest_dist <- min(dist_all)
# Calculate how long it takes to traverse `closest_dist`
# assuming we drive at 30 miles/hour speed.
# Store the output in trvt_osm_m.
car_speed <- set_units(30, mile/h)
trvt_osm_m <- closest_dist/set_units(car_speed, m/min) %>% # Distance divided by 30 mile/h
as.vector(.)
paths_closest <- osm %>%
activate("nodes") %>%
slice(paths$node_paths[[closest_index]])
am_stop_time <- filter_stop_times(gtfs_obj = gtfs,
extract_date = "2024-11-14",
min_departure_time = 3600*7,
max_arrival_time = 3600*10)
trvt <- travel_times(filtered_stop_times = am_stop_time,
stop_name = closest_station,
time_range = 3600,
arrival = FALSE,
max_transfers = 1,
return_coords = TRUE) %>%
filter(to_stop_name == "MIDTOWN STATION")
trvt_gtfs_m <- trvt$travel_time/60
total_trvt <- trvt_osm_m + trvt_gtfs_m
# //TASK //////////////////////////////////////
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
if (length(total_trvt) == 0) {total_trvt = 0}
return(total_trvt)
# =========== NO MODIFY ZONE ENDS HERE ========================================
}
# Prepare an empty vector
total_trvt <- vector("numeric", nrow(home))
# Apply the function for all Census Tracts
# Fill `total_trvt` object with the calculated time
for (i in 1:nrow(home)){
total_trvt[i] <- get_trvt(home[i,], osm, station, midtown)
}
# Cbind the calculated travel time back to `home`
home_done <- home %>%
cbind(trvt = total_trvt)
# Map
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(census[census$GEOID %in% home$GEOID,]) +
tm_polygons(col = "hhinc", palette = 'GnBu') +
tm_shape(home_done) +
tm_dots(col = "trvt", palette = 'Reds', size = 0.1)
tm_shape(census[census$GEOID %in% home$GEOID,]) +
tm_polygons(col = "pct_minority", palette = 'GnBu') +
tm_shape(home_done) +
tm_dots(col = "trvt", palette = 'Reds', size = 0.1)
# ggplot
inc <- ggplot(data = home_done,
aes(x = hhinc, y = trvt)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Median Annual Household Income",
y = "Park-and-ride Travel Time from Home to Midtown Station") +
theme_bw()
minority <- ggplot(data = home_done,
aes(x = pct_minority, y = trvt)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Minority Population (%)",
y = "Park-and-ride Travel Time from Home to Midtown Station") +
theme_bw()
ggpubr::ggarrange(inc, minority)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
#From the maps, the correlation between household income and travel time, as well as minority population and travel time, is difficult to observe across census tracts.
#The plot comparing travel times across census tracts by median annual household income has a positive slope. This suggests that as household income increases, so does travel time. The second plot shows travel time relative to minority population; the somewhat flat slope indicates that travel times are similar across tracts regardless of minority population.