library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'ggplot2' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tmap)

## Warning: package 'tmap' was built under R version 4.3.3

## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')

library(units)

## udunits database from C:/Users/Hina/AppData/Local/R/win-library/4.3/units/share/udunits/udunits2.xml

library(sf)

## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE

library(leaflet)

## Warning: package 'leaflet' was built under R version 4.3.3

library(dbscan)

## Warning: package 'dbscan' was built under R version 4.3.3

## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram

library(sfnetworks)

## Warning: package 'sfnetworks' was built under R version 4.3.3

library(tigris)

## Warning: package 'tigris' was built under R version 4.3.3

## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.

library(tidygraph)

## Warning: package 'tidygraph' was built under R version 4.3.3

## 
## Attaching package: 'tidygraph'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(plotly)

## Warning: package 'plotly' was built under R version 4.3.3

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(osmdata)

## Warning: package 'osmdata' was built under R version 4.3.3

## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright

library(here)

## Warning: package 'here' was built under R version 4.3.3

## here() starts at C:/Users/Hina/Downloads

library(tidytransit)

## Warning: package 'tidytransit' was built under R version 4.3.3

library(tidycensus)

## Warning: package 'tidycensus' was built under R version 4.3.3

library(leafsync)

## Warning: package 'leafsync' was built under R version 4.3.3

epsg <- 4326

Step 1. Download Required data from GTFS.

# TASK ////////////////////////////////////////////////////////////////////////
# Download MARTA (Metropolitan Atlanta Rapid Transit Authority) GTFS data using `read_gtfs()` function and assign it to `gtfs` object
gtfs <- read_gtfs("https://www.itsmarta.com/google_transit_feed/google_transit.zip")
# //TASK //////////////////////////////////////////////////////////////////////



# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Edit stop_name to append serial numbers (1, 2, etc.) to remove duplicate names
stop_dist <- stop_group_distances(gtfs$stops, by='stop_name') %>%
  filter(dist_max > 200)

gtfs$stops <- gtfs$stops %>% 
  group_by(stop_name) %>% 
  mutate(stop_name = case_when(stop_name %in% stop_dist$stop_name ~ paste0(stop_name, " (", seq(1,n()), ")"),
                               TRUE ~ stop_name))

# Create a transfer table
gtfs <- gtfsrouter::gtfs_transfer_table(gtfs, 
                                        d_limit = 200, 
                                        min_transfer_time = 120)

## Registered S3 method overwritten by 'gtfsrouter':
##   method       from  
##   summary.gtfs gtfsio

# NOTE: Converting to sf format uses stop_lat and stop_lon columns contained in gtfs$stops.
#       In the conversion process, stop_lat and stop_lon are converted into a geometry column, and
#       the output sf object do not have the lat lon column anymore.
#       But many other functions in tidytransit look for stop_lat and stop_lon.
#       So I re-create them using mutate().
gtfs <- gtfs %>% gtfs_as_sf(crs = epsg)

gtfs$stops <- gtfs$stops %>% 
  ungroup() %>% 
  mutate(stop_lat = st_coordinates(.)[,2],
         stop_lon = st_coordinates(.)[,1]) 

# Get stop_id for rails and buses
rail_stops <- gtfs$routes %>% 
  filter(route_type %in% c(1)) %>% 
  inner_join(gtfs$trips, by = "route_id") %>% 
  inner_join(gtfs$stop_times, by = "trip_id") %>% 
  inner_join(gtfs$stops, by = "stop_id") %>% 
  group_by(stop_id) %>% 
  slice(1) %>% 
  pull(stop_id)

# Extract MARTA rail stations
station <- gtfs$stops %>% filter(stop_id %in% rail_stops)

# Extract Midtown Station
midtown <- gtfs$stops %>% filter(stop_id == "134")

# Create a bounding box to which we limit our analysis
bbox <- st_bbox(c(xmin = -84.45241, ymin = 33.72109, xmax = -84.35009, ymax = 33.80101), 
                 crs = st_crs(4326)) %>% 
  st_as_sfc()

# =========== NO MODIFY ZONE ENDS HERE ========================================

Step 2. Download Required data from Census

# TASK ////////////////////////////////////////////////////////////////////////
# Specify Census API key whichever you prefer using census_api_key() function

# //TASK //////////////////////////////////////////////////////////////////////



# TASK ////////////////////////////////////////////////////////////////////////
# Using get_acs() function, download Census Tract level data for 2022 for Fulton, DeKalb, and Clayton in GA.
# and assign it to `census` object.
# Make sure you set geometry = TRUE.

# Required data from the Census ACS:
#  1) Median Household Income (name the column `hhinc`)
#  2) Minority Population (%) (name the column `pct_minority`)
# Note: You may need to download two or more Census ACS variables to calculate minority population (%). "Minority" here can refer to either racial minorities or racial+ethnic minorities -- it's your choice.

census <- get_acs(
          geography = "tract",
          state = "GA",
          county = c("Fulton","Dekalb","Clayton"),
          variables = c(hhinc = "B19019_001",
                        white_nh = "B03002_003",
                         total = "B01003_001"),
          year = 2022,
          survey = "acs5", 
          geometry = TRUE,
          output = "wide") %>%
  select(GEOID, NAME, hhinc = hhincE, white_nh = white_nhE, total = totalE) %>%
  mutate(pct_minority = (1 - (white_nh/total))*100)

## Getting data from the 2018-2022 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

##   |                                                                              |                                                                      |   0%  |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  14%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%

# //TASK //////////////////////////////////////////////////////////////////////



# =========== NO MODIFICATION ZONE STARTS HERE ===============================
census <- census %>% 
  st_transform(crs = 4326) %>% 
  separate(col = NAME, into = c("tract", "county", "state"), sep = "; ")

# Convert it to POINT at polygon centroids and extract those that fall into bbox
# and assign it into `home` object
home <- census %>% st_centroid() %>% .[bbox,]

## Warning: st_centroid assumes attributes are constant over geometries

# =========== NO MODIFY ZONE ENDS HERE ========================================

Step 3. Download Required data from OSM.

# TASK ////////////////////////////////////////////////////////////////////////
# 1. Get OSM data using opq() function and bbox object defined in the previous code chunk.
# 2. Specify arguments for add_osm_feature() function using 
#    key = 'highway' and 
#    value = c("motorway", "trunk", "primary", "secondary", "tertiary", "residential", 
#              "motorway_link", "trunk_link", "primary_link", "secondary_link", 
#              "tertiary_link", "residential_link", "unclassified")
# 3. Convert the OSM data into an sf object using osmdata_sf() function
# 4. Convert osmdata polygons into lines using osm_poly2line() function

osm_road <- opq(bbox = bbox) %>%
  add_osm_feature(key = 'highway', 
                  value = c("motorway", "trunk", "primary", "secondary", "tertiary", "residential", "motorway_link", "trunk_link", "primary_link", "secondary_link", "tertiary_link", "residential_link", "unclassified")) %>%
  osmdata_sf() %>% 
  osm_poly2line()

tmap_mode('plot')

## tmap mode set to plotting

tm_shape(osm_road$osm_lines) + tm_lines(col = "highway")

# //TASK //////////////////////////////////////////////////////////////////////


# TASK ////////////////////////////////////////////////////////////////////////
# 1. Convert osm_road$osm_lines into sfnetwork using as_sfnetwork() function
# 2. Activate edges
# 3. Clean the network using edge_is_multiple(), edge_is_loop(), to_spatial_subdivision(), to_spatial_smooth()
# 4. Assign the cleaned network to an object named 'osm'

osm <- osm_road$osm_lines %>% 
  select(osm_id, highway)


osm <- sfnetworks::as_sfnetwork(osm, directed = FALSE) 
print(osm)

## # A sfnetwork with 6081 nodes and 5404 edges
## #
## # CRS:  EPSG:4326 
## #
## # An undirected multigraph with 1251 components with spatially explicit edges
## #
## # A tibble: 6,081 × 1
##               geometry
##            <POINT [°]>
## 1 (-84.34923 33.74581)
## 2  (-84.35038 33.7454)
## 3 (-84.35015 33.74516)
## 4   (-84.34924 33.745)
## 5 (-84.37242 33.74325)
## 6 (-84.36738 33.74324)
## # ℹ 6,075 more rows
## #
## # A tibble: 5,404 × 5
##    from    to osm_id  highway                                           geometry
##   <int> <int> <chr>   <chr>                                     <LINESTRING [°]>
## 1     1     2 9164335 motorway_link (-84.34923 33.74581, -84.3491 33.74624, -84…
## 2     3     4 9165104 motorway_link (-84.35015 33.74516, -84.34948 33.74517, -8…
## 3     5     6 9186247 motorway      (-84.37242 33.74325, -84.37048 33.74326, -8…
## # ℹ 5,401 more rows

print(paste0('Which one is active?: ', sfnetworks::active(osm)))

## [1] "Which one is active?: nodes"

osm2 <- osm %>%
  activate("edges") %>%
  filter(!edge_is_multiple()) %>%
  filter(!edge_is_loop()) 

# print out the differences in the edge count.
message(str_c("Before simplification, there were ", osm %>% st_as_sf("edges") %>% nrow(), " edges. \n",
            "After simplification, there are ", osm2 %>% st_as_sf("edges") %>% nrow(), " edges."))

## Before simplification, there were 5404 edges. 
## After simplification, there are 5360 edges.

# Using spatial morpher to subdivide
subdiv_osm2 <- convert(osm2, sfnetworks::to_spatial_subdivision)

## Warning: to_spatial_subdivision assumes attributes are constant over geometries

# Using spatial morpher to smooth
smoothed_osm2 <- convert(subdiv_osm2, sfnetworks::to_spatial_smooth)

osm <- smoothed_osm2

# //TASK //////////////////////////////////////////////////////////////////////



# TASK ////////////////////////////////////////////////////////////////////////
# Add a new column named 'length' to the edges part of the object `osm`.
osm <- osm %>% 
  st_transform(4326) %>% activate("edges") %>% mutate(length = edge_length())
# //TASK //////////////////////////////////////////////////////////////////////

Step 4. Simulate a park-and-ride trip (home -> closest station -> Midtown station).

# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Extract the first row from `home` object and store it `home_1`
home_1 <- home[1,]
# =========== NO MODIFY ZONE ENDS HERE ========================================


# TASK ////////////////////////////////////////////////////////////////////////
# Find the shortest path from `home_1` to all other stations
# using st_network_paths() function.

paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")

# //TASK //////////////////////////////////////////////////////////////////////

  
# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Using the `paths` object, get network distances from `home_1` to all other stations.
dist_all <- map_dbl(1:nrow(paths), function(x){
  osm %>%
    activate("nodes") %>% 
    slice(paths$node_paths[[x]]) %>% 
    st_as_sf("edges") %>% 
    pull(length) %>% 
    sum()
}) %>% unlist() 

# Replace zeros with a large value.
if (any(dist_all == 0)){
  dist_all[dist_all == 0] <- max(dist_all)
}

# Find the closest station.
closest_index <- which.min(dist_all)
closest_station <- station[closest_index,]

# Find the distance to the closest station.
closest_dist <- min(dist_all)

# Calculate how long it takes to traverse `closest_dist` 
# assuming we drive at 30 miles/hour speed.
# Store the output in trvt_osm_m.
car_speed <- set_units(30, mile/h)
trvt_osm_m <- closest_dist/set_units(car_speed, m/min) %>%  # Distance divided by 30 mile/h
  as.vector(.)
# =========== NO MODIFY ZONE ENDS HERE ========================================


# TASK ////////////////////////////////////////////////////////////////////////
# 1. From `osm` object, activate nodes part and
# 2. use `closest_index` to extract the selected path
paths_closest <- osm %>%
  activate("nodes") %>% 
  slice(paths$node_paths[[closest_index]])
# //TASK //////////////////////////////////////////////////////////////////////


# TASK ////////////////////////////////////////////////////////////////////////
# Use filter_stop_times() function to create a subset of stop_times data table
# for date = 2024-11-14, minimum departure time of 7AM, maximum departure time of 10AM.
# Assign the output to `am_stop_time` object
am_stop_time <- filter_stop_times(gtfs_obj = gtfs, 
                                  extract_date = "2024-11-14",
                                  min_departure_time = 3600*7, 
                                  max_arrival_time = 3600*10) 
# //TASK //////////////////////////////////////////////////////////////////////



# TASK ////////////////////////////////////////////////////////////////////////
# 1. Use travel_times() function to calculate travel times from the `closest_station` 
#    to all other stations during time specified in am_stop_time. Allow ONE transfer.
# 2. Filter the row for which the value of 'to_stop_name' column 
#    equals midtown$stop_name. Assign it into `trvt` object.
trvt <-  travel_times(filtered_stop_times = am_stop_time,
                     stop_name = closest_station,
                     time_range = 3600,
                     arrival = FALSE,
                     max_transfers = 1,
                     return_coords = TRUE) %>%
  filter(to_stop_name == "MIDTOWN STATION")

# //TASK //////////////////////////////////////////////////////////////////////



# =========== NO MODIFICATION ZONE STARTS HERE ===============================
# Divide the calculated travel time by 60 to convert the unit from seconds to minutes.
trvt_gtfs_m <- trvt$travel_time/60

# Add the travel time from home to the nearest station and
# the travel time from the nearest station to Midtown station
total_trvt <- trvt_osm_m + trvt_gtfs_m
# =========== NO MODIFY ZONE ENDS HERE ========================================

Step 5. Convert Step 4 into a function

# Function definition (do not modify other parts of the code in this code chunk except for those inside the TASK section)

get_trvt <- function(home, osm, station, midtown){
  
  # TASK ////////////////////////////////////////
  # If the code in Step 4 runs fine,
  # Replace where it says **YOUR CODE HERE..** below with 
  # the ENTIRETY of the code in the previous code chunk (i.e., Step 4)
  # Extract the first row from `home` object and store it `home_1`
home_1 <- home[1,]
paths <- st_network_paths(osm, from = home_1, to = station, type = "shortest")

dist_all <- map_dbl(1:nrow(paths), function(x){
  osm %>%
    activate("nodes") %>% 
    slice(paths$node_paths[[x]]) %>% 
    st_as_sf("edges") %>% 
    pull(length) %>% 
    sum()
}) %>% unlist() 

# Replace zeros with a large value.
if (any(dist_all == 0)){
  dist_all[dist_all == 0] <- max(dist_all)
}

# Find the closest station.
closest_index <- which.min(dist_all)
closest_station <- station[closest_index,]

# Find the distance to the closest station.
closest_dist <- min(dist_all)

# Calculate how long it takes to traverse `closest_dist` 
# assuming we drive at 30 miles/hour speed.
# Store the output in trvt_osm_m.
car_speed <- set_units(30, mile/h)
trvt_osm_m <- closest_dist/set_units(car_speed, m/min) %>%  # Distance divided by 30 mile/h
  as.vector(.)

paths_closest <- osm %>%
  activate("nodes") %>% 
  slice(paths$node_paths[[closest_index]])

am_stop_time <- filter_stop_times(gtfs_obj = gtfs, 
                                  extract_date = "2024-11-14",
                                  min_departure_time = 3600*7, 
                                  max_arrival_time = 3600*10) 

trvt <-  travel_times(filtered_stop_times = am_stop_time,
                     stop_name = closest_station,
                     time_range = 3600,
                     arrival = FALSE,
                     max_transfers = 1,
                     return_coords = TRUE) %>%
  filter(to_stop_name == "MIDTOWN STATION")

trvt_gtfs_m <- trvt$travel_time/60

total_trvt <- trvt_osm_m + trvt_gtfs_m
  
  # //TASK //////////////////////////////////////

  # =========== NO MODIFICATION ZONE STARTS HERE ===============================
  if (length(total_trvt) == 0) {total_trvt = 0}

  return(total_trvt)
  # =========== NO MODIFY ZONE ENDS HERE ========================================
}

Step 6. Apply the function for the whole study area

# Prepare an empty vector
total_trvt <- vector("numeric", nrow(home))

# Apply the function for all Census Tracts
# Fill `total_trvt` object with the calculated time
for (i in 1:nrow(home)){
  total_trvt[i] <- get_trvt(home[i,], osm, station, midtown)
}

# Cbind the calculated travel time back to `home`
home_done <- home %>% 
  cbind(trvt = total_trvt)

Step 7. Create maps and plots

# Map
tmap_mode('view')

## tmap mode set to interactive viewing

tm_shape(census[census$GEOID %in% home$GEOID,]) + 
  tm_polygons(col = "hhinc", palette = 'GnBu') + 
  tm_shape(home_done) + 
  tm_dots(col = "trvt", palette = 'Reds', size = 0.1)

tm_shape(census[census$GEOID %in% home$GEOID,]) + 
  tm_polygons(col = "pct_minority", palette = 'GnBu') + 
  tm_shape(home_done) + 
  tm_dots(col = "trvt", palette = 'Reds', size = 0.1)

# ggplot
inc <- ggplot(data = home_done,
              aes(x = hhinc, y = trvt)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Median Annual Household Income",
       y = "Park-and-ride Travel Time from Home to Midtown Station") +
  theme_bw()

minority <- ggplot(data = home_done,
                   aes(x = pct_minority, y = trvt)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Minority Population (%)",
       y = "Park-and-ride Travel Time from Home to Midtown Station") +
  theme_bw()


ggpubr::ggarrange(inc, minority)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

#From the maps, the correlation between household income and travel time, as well as minority population and travel time, is difficult to observe across census tracts.

#The plot comparing travel times across census tracts by median annual household income has a positive slope. This suggests that as household income increases, so does travel time. The second plot shows travel time relative to minority population; the somewhat flat slope indicates that travel times are similar across tracts regardless of minority population.

Major_assignment_1

Originally written by Bon Woo Koo & Subhro Guhathakurta; modified by Uijeong Hwang

2024-10-16