Data Wrangling

Congestion Zone

# Load boundary
### This boundary is a custom ArcGIS layer I created to outline the the congestion zone. 
cong_zone <- st_read("./data/CongestionZoneShape.shp")
class(cong_zone)

st_crs(cong_zone)

cong_zone <- st_transform(cong_zone, crs = "EPSG:2263")

cong_zone <- st_make_valid(cong_zone)


# tmap_mode("view")

# tm_shape(cong_zone) +
#   tm_polygons()

New York City Geometry Codes

brough_names <-  c("Bronx","Manhattan","Staten Island","Brooklyn","Queens")
brough_codes <- c("005","061","085","047","081")

city_lu <- as.data.frame(cbind(brough_names,brough_codes))

colnames(city_lu) <- c("CountyName","COUNTYFP")

Get Census Geometry

# Census Tracts - 2019 
tracts_19 <- tracts(state = "NY", year = 2019)

# Project
tracts_19 <- st_transform(tracts_19, crs = "EPSG:2263")

# Limit to NYC
tracts_19_code <- tracts_19 %>%
  select(COUNTYFP, GEOID, geometry)

tracts_19_nyc <- inner_join(tracts_19_code, city_lu, by = "COUNTYFP")

# Test map
# tm_shape(tracts_19_nyc) +
#   tm_polygons(col = "grey40") + 
# tm_shape(cong_zone) +
#   tm_polygons(col = "dodgerblue")


# Break up Manhattan by in or out of zone
zone_tracts <- st_join(cong_zone, tracts_19_nyc, join = st_intersects)
zone_tracts <- zone_tracts %>%
  st_set_geometry(NULL) %>%
  select(GEOID)

zone_tracts$Borough <- c("Manhattan - Zone")

# class(test)

zone_tracts2 <- left_join(tracts_19_nyc, zone_tracts, by = "GEOID")

colnames(zone_tracts2) <- c("COUNTYFP","GEOID","County","Borough","geometry")

zone_tracts2$BoroughFinal <- case_when(is.na(zone_tracts2$Borough) == TRUE ~ zone_tracts2$County,
                                       zone_tracts2$GEOID == "36061014300" ~ zone_tracts2$County,
                                       is.na(zone_tracts2$Borough) == FALSE ~ zone_tracts2$Borough)

zone_tracts2 <- zone_tracts2[,c(2,3,6,4)]
tracts_19_nyc <- zone_tracts2[,-c(4)]

colnames(tracts_19_nyc) <- c("GEOID","Borough","BoroughFinal","geometry")

tracts_19_nyc %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(n = n_distinct(GEOID)) %>%
  arrange(desc(n))


test_zone <- tracts_19_nyc %>%
  filter(BoroughFinal == "Manhattan - Zone")

# Test map
#tmap_mode(("view"))

#tm_shape(test_zone) +
# tm_polygons(col = "grey40") + 
#tm_shape(cong_zone) +
# tm_polygons(col = "dodgerblue")

# Clear intermediate tables
remove(zone_tracts)
remove(zone_tracts2)
remove(tracts_19_code)
remove(tracts_19)
remove(city_lu)
remove(test_zone)

Demographic Variables

# Race - State
race_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B02001",
  output="wide",
  cache_table = TRUE
)

colnames(race_0)

race_acs19 <- race_0[,c(1:3,5,7,9,11,13,15,17)]

colnames(race_acs19) <- c("GEOID", "NAME", "TotalPop", "WhitePop", "BlackPop", "NativePop", "AsianPop", "PacificIslPop", "OtherPop", "MultiRacePop")

remove(race_0)

# Race - City
demo_race <- left_join(tracts_19_nyc, race_acs19, by = "GEOID")

class(demo_race)
st_crs(demo_race)

race_city_test <- demo_race %>%
  filter(is.na(geometry) == TRUE)
  
remove(race_city_test)
remove(race_acs19)


# Age - State
med_age_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B01002",
  output="wide",
  cache_table = TRUE
)

colnames(med_age_0)

med_age_acs19 <- med_age_0[,c(1:3)]

colnames(med_age_acs19) <- c("GEOID", "NAME", "MedianAge")

remove(med_age_0)

# Age - City
demo_age <- left_join(tracts_19_nyc, med_age_acs19, by = "GEOID")

st_crs(demo_age)

age_city_test <- demo_age %>%
  filter(is.na(geometry) == TRUE)
  
remove(age_city_test)
remove(med_age_acs19)


# Median Income - State
med_income_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B19013",
  output="wide",
  cache_table = TRUE
)

colnames(med_income_0)

med_inc_acs19 <- med_income_0[,c(1:3)]

colnames(med_inc_acs19) <- c("GEOID", "NAME", "MedIncome")

remove(med_income_0)

# Med Income - City
demo_med_inc <- left_join(tracts_19_nyc, med_inc_acs19, by = "GEOID")

demo_med_inc_test <- demo_med_inc %>%
  filter(is.na(geometry) == TRUE)
  
remove(demo_med_inc_test)
remove(med_inc_acs19)


# HH Income - State
hh_income_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B19001",
  output="wide",
  cache_table = TRUE
)

colnames(hh_income_0)

hhc_inc_acs19 <- hh_income_0[,c(1:2,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35)]

colnames(hhc_inc_acs19) <- c("GEOID", "NAME", "Inc_0_10","Inc_10_15","Inc_15_20"
                             ,"Inc_20_25","Inc_25_30","Inc_30_35","Inc_35_40"
                             ,"Inc_40_45","Inc_45_50","Inc_50_60","Inc_60_75"
                             ,"Inc_75_100","Inc_100_125","Inc_125_150"
                             ,"Inc_150_200","Inc_200_Plus")

remove(hh_income_0)

# HH Income - City
demo_hh_inc <- left_join(tracts_19_nyc, hhc_inc_acs19, by = "GEOID")

hhc_inc_city_test <- demo_hh_inc %>%
  filter(is.na(geometry) == TRUE)
  
remove(hhc_inc_city_test)
remove(hhc_inc_acs19)


# Poverty - State
poverty_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B17020",
  output="wide",
  cache_table = TRUE
)

colnames(poverty_0)

poverty_acs19 <- poverty_0[,c(1:3,5)]

colnames(poverty_acs19) <- c("GEOID", "NAME", "TotalPop", "PovertyPop")

remove(poverty_0)

# Med Income - City
demo_poverty <- left_join(tracts_19_nyc, poverty_acs19, by = "GEOID")

demo_poverty_test <- demo_poverty %>%
  filter(is.na(geometry) == TRUE)
  
remove(demo_poverty_test)
remove(poverty_acs19)


# Citizenship - State
# citizenship_0 <- get_acs(
#   geography = "tract",
#   state = "NY",
#   survey = "acs5",
#   year = 2019,
#   table = "B05001",
#   output="wide",
#   cache_table = TRUE
# )
# 
# colnames(citizenship_0)
# 
# citizenship_acs19 <- citizenship_0[,c(1:3,13)]
# 
# colnames(citizenship_acs19) <- c("GEOID", "NAME", "TotalPop", "NonCitizenPop")
# 
# citizenship_acs19$CitizenPop <- citizenship_acs19$TotalPop - citizenship_acs19$NonCitizenPop
# 
# citizenship_acs19 <- citizenship_acs19[,c(1,2,4,5)]
# 
# remove(citizenship_0)
# 
# Citizenship - City
# demo_citizen <- left_join(tracts_19_nyc, citizenship_acs19, by = "GEOID")
# 
# citizenship_city_test <- demo_citizen %>%
#   filter(is.na(geometry) == TRUE)
#   
# remove(citizenship_city_test)
# remove(citizenship_acs19)

Commute Data

# Travel time (residence area) - State
travel_time_res_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B08303",
  output="wide",
  cache_table = TRUE
)

colnames(travel_time_res_0)

t_time_res_acs19 <- travel_time_res_0[,c(1:3,5,7,9,11,13,15,17,19,21,23,25,27)]

colnames(t_time_res_acs19) <- c("GEOID", "NAME", "TotalPop"
                            ,"TT_0_5"
                            ,"TT_5_9"
                            ,"TT_10_14"
                            ,"TT_15_19"
                            ,"TT_20_24"
                            ,"TT_25_29"
                            ,"TT_30_34"
                            ,"TT_35_39"
                            ,"TT_40_44"
                            ,"TT_45_59"
                            ,"TT_60_89"
                            ,"TT_90_Plus")

remove(travel_time_res_0)

# Travel time - City
comm_t_time <- left_join(tracts_19_nyc, t_time_res_acs19, by = "GEOID")

t_time_city_test <- comm_t_time %>%
  filter(is.na(geometry) == TRUE)
  
remove(t_time_city_test)
remove(t_time_res_acs19)


# Means of transport (residence area) - State
means_transport_res_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B08301",
  output="wide",
  cache_table = TRUE
)

colnames(means_transport_res_0)

t_means_res_acs19 <- means_transport_res_0[,c(1:3,5,21,23,25,27,39)]

colnames(t_means_res_acs19) <- c("GEOID", "NAME", "TotalPop"
                            ,"Automobile"
                            ,"PublicTransit"
                            ,"Bus"
                            ,"Subway"
                            ,"CommuterRail"
                            ,"Walked")

remove(means_transport_res_0)

# Means of transport - City
comm_means <- left_join(tracts_19_nyc, t_means_res_acs19, by = "GEOID")

t_means_city_test <- comm_means %>%
  filter(is.na(geometry) == TRUE)
  
remove(t_means_city_test)
remove(t_means_res_acs19)


# Means of transport / travel time (residence area) - State
means_transport_travel_time_res_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B08134",
  output="wide",
  cache_table = TRUE
)

colnames(means_transport_travel_time_res_0)


# Automobile
t_time_auto_res_acs19 <- means_transport_travel_time_res_0[,c(1:3,23,25,27,29,31,33,35,37,39,41)]

colnames(t_time_auto_res_acs19) <- c("GEOID", "NAME"
                                      ,"TotalPop"
                                      ,"TotalAuto"
                                      ,"Auto_TT_0_10"
                                      ,"Auto_TT_10_14"
                                      ,"Auto_TT_15_19"
                                      ,"Auto_TT_20_24"
                                      ,"Auto_TT_25_29"
                                      ,"Auto_TT_30_34"
                                      ,"Auto_TT_35_44"
                                      ,"Auto_TT_45_59"
                                      ,"Auto_TT_60_Plus")

# Auto - City
comm_t_time_auto <- left_join(tracts_19_nyc, t_time_auto_res_acs19, by = "GEOID")

t_time_auto_res_city_test <- comm_t_time_auto %>%
  filter(is.na(geometry) == TRUE)
  
remove(t_time_auto_res_city_test)
remove(t_time_auto_res_acs19)


# Bus
t_time_bus_res_acs19 <- means_transport_travel_time_res_0[,c(1:3,143,145,147,149,151,153,155,157,159, 161)]

colnames(t_time_bus_res_acs19) <- c("GEOID", "NAME"
                                        ,"TotalPop"
                                        ,"TotalBus"
                                        ,"Bus_TT_0_10"
                                        ,"Bus_TT_10_14"
                                        ,"Bus_TT_15_19"
                                        ,"Bus_TT_20_24"
                                        ,"Bus_TT_25_29"
                                        ,"Bus_TT_30_34"
                                        ,"Bus_TT_35_44"
                                        ,"Bus_TT_45_59"
                                        ,"Bus_TT_60_Plus")

# Bus - City
comm_t_time_bus <- left_join(tracts_19_nyc, t_time_bus_res_acs19, by = "GEOID")

t_time_bus_res_city_test <- comm_t_time_bus %>%
  filter(is.na(geometry) == TRUE)
  
remove(t_time_bus_res_city_test)
remove(t_time_bus_res_acs19)


# Vehicles Available
vehicles_res_0 <- get_acs(
  geography = "tract",
  state = "NY",
  survey = "acs5",
  year = 2019,
  table = "B08201",
  output="wide",
  cache_table = TRUE
)


vehicles_avail_res_acs19 <- vehicles_res_0[,c(1:3,5,7,9,11,13)]

colnames(vehicles_avail_res_acs19) <- c("GEOID", "NAME", "TotalPop"
                                 ,"NoVehicle"
                                 ,"OneVehicle"
                                 ,"TwoVehicles"
                                 ,"ThreeVehicles"
                                 ,"FourPlusVehicles")

vehicles_avail_res_acs19$TwoPlusVehicles <- vehicles_avail_res_acs19$TwoVehicles + vehicles_avail_res_acs19$ThreeVehicles + vehicles_avail_res_acs19$FourPlusVehicles


vehicles_avail_res_acs19 <- vehicles_avail_res_acs19[,c(1:5,9)]


remove(vehicles_res_0)

# Vehicles Available - City
comm_vehicles_avail <- left_join(tracts_19_nyc, vehicles_avail_res_acs19, by = "GEOID")

vehicles_avail_res_city_test <- comm_vehicles_avail %>%
  filter(is.na(geometry) == TRUE)
  
comm_vehicles_avail$PctNoVehicle <- round((comm_vehicles_avail$NoVehicle / comm_vehicles_avail$TotalPop)*100,1)
comm_vehicles_avail$PctOneVehicle <- round((comm_vehicles_avail$OneVehicle / comm_vehicles_avail$TotalPop)*100,1)
comm_vehicles_avail$PctMultVehicle <- round((comm_vehicles_avail$TwoPlusVehicles / comm_vehicles_avail$TotalPop)*100,1)


remove(vehicles_avail_res_city_test)
remove(vehicles_avail_res_acs19)

remove(means_transport_travel_time_res_0)

LODES Data

# State Level
ny_rac <- grab_lodes(state = "ny", year = 2019, lodes_type = "rac", job_type = "JT01", 
                     segment = "S000", state_part = "main", agg_geo = "tract")

ny_rac <- ny_rac[,c(1:10,31:42)]

colnames(ny_rac) <- c("Year", "State", "GEOID"
                      , "TotalJobs"
                      , "Workers_0_29"
                      , "Workers_30_54"
                      , "Workers_55_Plus"
                      , "Earnings_0_1250"
                      , "Earnings_1251_3332"
                      , "Earnings_3333_Plus"
                      , "Workers_White"
                      , "Workers_Black"
                      , "Workers_Native"
                      , "Workers_Asian"
                      , "Workers_PacIsl"
                      , "Workers_MixedRace"
                      , "Workers_Not_HispLat"
                      , "Workers_HispLat"
                      , "Less_High_School"
                      , "High_School"
                      , "Some_College"
                      , "Bachelors")


# Join geometry (limits to city inherently)
lodes_rac_nyc <- left_join(tracts_19_nyc, ny_rac, by = "GEOID")

test_rac_join <- lodes_rac_nyc %>%
  filter(is.na(geometry) == TRUE)

remove(test_rac_join)

lodes_rac_nyc <- st_as_sf(lodes_rac_nyc)

lodes_rac_nyc <- st_make_valid(lodes_rac_nyc)

st_crs(lodes_rac_nyc)

class(lodes_rac_nyc)

remove(ny_rac)

# State Level
ny_od <- grab_lodes(state = "ny", year = 2019, lodes_type = "od", job_type = "JT01", 
                    segment = "S000", state_part = "main", agg_geo = "tract")

ny_od <- ny_od[,c(1:11)]


# Join geometry (res)
colnames(ny_od) <- c("Year","State","GEOID_Wrk","GEOID"
                     ,"TotalJobs"
                     ,"Workers_0_29"
                     ,"Workers_30_54"
                     ,"Workers_55_Plus"
                     ,"Earnings_0_1250"
                     ,"Earnings_1251_3332"
                     ,"Earnings_3333_Plus")



ny_od_res_geom <- left_join(ny_od, tracts_19_nyc, by = "GEOID")


# Join geometry (wrk)
colnames(ny_od) <- c("Year","State","GEOID","GEOID_Res"
                     ,"TotalJobs"
                     ,"Workers_0_29"
                     ,"Workers_30_54"
                     ,"Workers_55_Plus"
                     ,"Earnings_0_1250"
                     ,"Earnings_1251_3332"
                     ,"Earnings_3333_Plus")


ny_od_wrk_geom <- left_join(ny_od, tracts_19_nyc, by = "GEOID")

ny_od_wrk_geom <- ny_od_wrk_geom[,c(12:14)]

colnames(ny_od_wrk_geom) <- c("Borough.wrk"
                              ,"BoroughFinal.wrk"
                              ,"geometry.wrk")


# Combine geometries
ny_od_full_geom <- cbind(ny_od_res_geom,ny_od_wrk_geom)

colnames(ny_od_full_geom) <- c("Year","State","GEOID_Wrk","GEOID_Res"
                     ,"TotalJobs"
                     ,"Workers_0_29"
                     ,"Workers_30_54"
                     ,"Workers_55_Plus"
                     ,"Earnings_0_1250"
                     ,"Earnings_1251_3332"
                     ,"Earnings_3333_Plus"
                     ,"Borough.res"
                     ,"BoroughFinal.res"
                     ,"geometry.res"
                     ,"Borough.wrk"
                     ,"BoroughFinal.wrk"
                     ,"geometry.wrk")


# Limit to where origin is within NYC
nyc_od_full_geom <- ny_od_full_geom %>%
  filter(is.na(BoroughFinal.res) == FALSE) 


# Limit to where origin and destination are within NYC
# nyc_od_full_geom <- ny_od_full_geom %>%
#   filter(is.na(BoroughFinal.res) == FALSE & is.na(BoroughFinal.wrk) == FALSE) 


# Remove intermediate tables
remove(ny_od)
remove(ny_od_full_geom)
remove(ny_od_res_geom)
remove(ny_od_wrk_geom)


# Get tract centroids
nyc_od_full_geom$res_point <- st_centroid(nyc_od_full_geom$geometry.res)

nyc_od_full_geom$wrk_point <- st_centroid(nyc_od_full_geom$geometry.wrk)


# Extract coordinates (Res)
res_coords_0 <- unlist(nyc_od_full_geom$res_point)
res_coords_df <- data.frame(res_coords_0)

res_row <- seq_len(nrow(res_coords_df)) %% 2 


res_row_odd <- res_coords_df[res_row == 1, ]
res_row_even <- res_coords_df[res_row == 0, ]

res_coords <- cbind(data.frame(res_row_odd), data.frame(res_row_even))

colnames(res_coords) <- c("Res_Lat", "Res_Long")


# Extract coordinates (Wrk)
wrk_coords_0 <- unlist(nyc_od_full_geom$wrk_point)
wrk_coords_df <- data.frame(wrk_coords_0)

wrk_row <- seq_len(nrow(wrk_coords_df)) %% 2 


wrk_row_odd <- wrk_coords_df[wrk_row == 1, ]
wrk_row_even <- wrk_coords_df[wrk_row == 0, ]

wrk_coords <- cbind(data.frame(wrk_row_odd), data.frame(wrk_row_even))

colnames(wrk_coords) <- c("Wrk_Lat", "Wrk_Long")


# Bind coordinates to OD data
lodes_od_nyc <- cbind(nyc_od_full_geom, res_coords, wrk_coords)

class(lodes_od_nyc)


# Remove intermediate objects
remove(res_coords_0)
remove(res_row)
remove(res_row_even)
remove(res_row_odd)
remove(res_coords)
remove(res_coords_df)
remove(wrk_coords_0)
remove(wrk_row)
remove(wrk_row_even)
remove(wrk_row_odd)
remove(wrk_coords)
remove(wrk_coords_df)
remove(nyc_od_full_geom)


# Make Spatial

lodes_od_nyc <- st_as_sf(lodes_od_nyc)

class(lodes_od_nyc)

lodes_od_nyc <- st_make_valid(lodes_od_nyc)


# Account for non-NYC workers
lodes_od_nyc$BoroughFinal.wrk <- case_when(lodes_od_nyc$BoroughFinal.wrk == "Manhattan" ~ "Manhattan"
                                           ,lodes_od_nyc$BoroughFinal.wrk == "Bronx" ~ "Bronx"
                                           ,lodes_od_nyc$BoroughFinal.wrk == "Queens" ~ "Queens"
                                           ,lodes_od_nyc$BoroughFinal.wrk == "Brooklyn" ~ "Brooklyn"
                                           ,lodes_od_nyc$BoroughFinal.wrk == "Manhattan - Zone" ~ "Manhattan - Zone"
                                           ,lodes_od_nyc$BoroughFinal.wrk == "Staten Island" ~ "Staten Island"
                                           ,is.na(lodes_od_nyc$BoroughFinal.wrk) == TRUE ~ "Outside NYC")

Bus Routes

bus_routes <- st_read("./data/bus_routes_nyc_dec2019.shp")

bus_routes <- st_transform(bus_routes, crs = "EPSG:2263")

bus_routes <- st_make_valid(bus_routes)

st_crs(bus_routes) == st_crs(tracts_19_nyc)

Subway Lines

sub_lines <- st_read("./data/SubwayLines.shp")

sub_lines <- st_transform(sub_lines, crs = "EPSG:2263")

sub_lines <- st_make_valid(sub_lines)

st_crs(sub_lines) == st_crs(tracts_19_nyc)

Borough-Level Exploratory Data Analysis

Demographics

# Race
demo_race$PctWhite <- round((demo_race$WhitePop / demo_race$TotalPop)*100,1)
demo_race$PctBlack <- round((demo_race$BlackPop / demo_race$TotalPop)*100,1)
demo_race$NonWhitePop <- demo_race$TotalPop - demo_race$WhitePop
demo_race$PctNonWhite <- round(100 - demo_race$PctWhite,1)


# Poverty 
demo_poverty$PovRate <- round((demo_poverty$PovertyPop / demo_poverty$TotalPop)*100,1)


# Citizenship
# demo_citizen$PctNonCitizen <- round((demo_citizen$NonCitizenPop / (demo_citizen$CitizenPop + demo_citizen$NonCitizenPop))*100,1)


# Consolidate key variables into one table
race_key <- demo_race %>%
  st_set_geometry(NULL) %>%
  select(GEOID,TotalPop, WhitePop, NonWhitePop, PctWhite, PctNonWhite)

age_key <- demo_age %>%
  st_set_geometry(NULL) %>%
  select(GEOID, MedianAge)

med_inc_key <- demo_med_inc %>%
  st_set_geometry(NULL) %>%
  select(GEOID, MedIncome)

poverty_key <- demo_poverty %>%
  st_set_geometry(NULL) %>%
  select(GEOID, PovRate)


race_key_df <- data.frame(race_key)
age_key_df <- data.frame(age_key)
med_inc_key_df <- data.frame(med_inc_key)
poverty_key_df <- data.frame(poverty_key)


# Join key variables
demo_key_vars_0 <- left_join(race_key_df,age_key_df, by = "GEOID")

demo_key_vars_1 <- left_join(demo_key_vars_0,med_inc_key_df, by = "GEOID")

demo_key_vars_2 <- left_join(demo_key_vars_1,poverty_key_df, by = "GEOID")


# Join geometry
demo_key_vars <- left_join(tracts_19_nyc, demo_key_vars_2, by = "GEOID")

# class(demo_key_vars)


# Drop intermediate tables
remove(race_key)
remove(age_key)
remove(med_inc_key)
remove(poverty_key)
remove(race_key_df)
remove(age_key_df)
remove(med_inc_key_df)
remove(poverty_key_df)
remove(demo_key_vars_0)
remove(demo_key_vars_1)
remove(demo_key_vars_2)

Race

# Race by Borough
demo_race_brough <- demo_race %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(TotalPop = sum(TotalPop)
            ,WhitePop = sum(WhitePop)
            ,NonWhitePop = sum(NonWhitePop))

demo_race_brough$PctWhite <- demo_race_brough$WhitePop / demo_race_brough$TotalPop
demo_race_brough$PctNonWhite <- demo_race_brough$NonWhitePop / demo_race_brough$TotalPop

demo_race_brough_unpivot <- melt(data.frame(demo_race_brough))

colnames(demo_race_brough_unpivot) <- c("BoroughFinal","Category","Estimate")

demo_race_brough_unpivot_abs <- demo_race_brough_unpivot %>%
  filter(Category == "TotalPop" | Category == "WhitePop" | Category == "NonWhitePop")
  
demo_race_brough_unpivot_pct <- demo_race_brough_unpivot %>%
  filter(Category == "PctWhite" | Category == "PctNonWhite")


# Absolute race figures
ggplot(data = demo_race_brough_unpivot_abs) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.85), width = 0.7) +
  labs(title="Simple Demographics by Borough", subtitle = "Absolute population", x = "Residence Borough", y = "Population", fill = "Race") +
  scale_fill_manual(values = c("TotalPop" = "#003f5c"
                                , "WhitePop" = "#bc5090"
                                , "NonWhitePop" = "#ffa600")
                    ,labels = c("TotalPop" = "Total"
                                , "WhitePop" = "White"
                                , "NonWhitePop" = "NonWhite")) +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Demo/Race_Borough_Abs.png", units = "in", height = 8, width = 12)


# Percentages
ggplot(data = demo_race_brough_unpivot_pct) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Simple Demographics by Borough", subtitle = "Share of population", x = "Residence Borough", y = "Percent of Population", fill = "Race") +
  scale_fill_manual(values = c("PctWhite" = "#bc5090"
                                , "PctNonWhite" = "#ffa600")
                    ,labels = c("PctWhite" = "White"
                                , "PctNonWhite" = "NonWhite")) +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Demo/Race_Borough_Pct.png", units = "in", height = 8, width = 12)


# Remove intermediate tables
remove(demo_race_brough_unpivot)
remove(demo_race_brough_unpivot_abs)
remove(demo_race_brough_unpivot_pct)

Income

# Income by Borough
demo_med_inc_brough_0 <- demo_key_vars %>%
  st_set_geometry(NULL) %>%
  filter(is.na(MedIncome) == FALSE) %>%
  select(GEOID, BoroughFinal, MedIncome, TotalPop)


demo_borough_pop <- demo_key_vars %>%
  st_set_geometry(NULL) %>%
  filter(is.na(MedIncome) == FALSE) %>%
  group_by(BoroughFinal) %>%
  summarize(BorPop = sum(TotalPop))


demo_med_inc_brough_1 <- left_join(demo_med_inc_brough_0, demo_borough_pop, by = "BoroughFinal")


demo_med_inc_brough_1$weighted_income <- demo_med_inc_brough_1$MedIncome * demo_med_inc_brough_1$TotalPop


demo_med_inc_brough <- demo_med_inc_brough_1 %>%
  group_by(BoroughFinal) %>%
  summarize(MedIncome = sum(weighted_income)/mean(BorPop))

# Income Plot
ggplot(data = demo_med_inc_brough) +
  geom_col(aes(x = as_factor(BoroughFinal), y = MedIncome), fill = "#58508d") +
  labs(title="Median Income by Borough", subtitle = "Weighted by population across census tracts", x = "Residence Borough", y = "Median Household Income ($)") +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Demo/Inc_Borough.png", units = "in", height = 8, width = 12)

remove(demo_med_inc_brough_0)
remove(demo_med_inc_brough_1)
remove(demo_borough_pop)

Age

# Age by Borough
demo_age_brough_0 <- demo_key_vars %>%
  st_set_geometry(NULL) %>%
  filter(is.na(MedianAge) == FALSE) %>%
  select(GEOID, BoroughFinal, MedianAge, TotalPop)


demo_borough_pop <- demo_key_vars %>%
  st_set_geometry(NULL) %>%
  filter(is.na(MedIncome) == FALSE) %>%
  group_by(BoroughFinal) %>%
  summarize(BorPop = sum(TotalPop))


demo_age_brough_1 <- left_join(demo_age_brough_0, demo_borough_pop, by = "BoroughFinal")


demo_age_brough_1$weighted_age <- demo_age_brough_1$MedianAge * demo_age_brough_1$TotalPop


demo_age_brough <- demo_age_brough_1 %>%
  group_by(BoroughFinal) %>%
  summarize(MedianAge = sum(weighted_age)/mean(BorPop))


# Age Plot
ggplot(data = demo_age_brough) +
  geom_col(aes(x = as_factor(BoroughFinal), y = MedianAge), fill = "#ff6361") +
  labs(title="Median Age by Borough", subtitle = "Weighted by population across census tracts", x = "Residence Borough", y = "Median Age") +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Demo/Age_Borough.png", units = "in", height = 8, width = 12)

remove(demo_age_brough_0)
remove(demo_age_brough_1)
remove(demo_borough_pop)

Commute

Vehicular Access

# Vehicle Access by Borough
comm_vehicles_avail_brough <- comm_vehicles_avail %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(TotalPop = sum(TotalPop)
            ,NoVehicle = sum(NoVehicle)
            ,OneVehicle = sum(OneVehicle)
            ,MultipleVehicles = sum(TwoPlusVehicles))


comm_vehicles_avail_brough$PctNoVehicle <-  comm_vehicles_avail_brough$NoVehicle / comm_vehicles_avail_brough$TotalPop
comm_vehicles_avail_brough$PctOneVehicle <- comm_vehicles_avail_brough$OneVehicle / comm_vehicles_avail_brough$TotalPop
comm_vehicles_avail_brough$PctMultipleVehicles <- comm_vehicles_avail_brough$MultipleVehicles / comm_vehicles_avail_brough$TotalPop


comm_vehicles_avail_brough_unpivot <- melt(data.frame(comm_vehicles_avail_brough))

colnames(comm_vehicles_avail_brough_unpivot) <- c("BoroughFinal","Category","Estimate")

comm_vehicles_avail_brough_unpivot_abs <- comm_vehicles_avail_brough_unpivot %>%
  filter(Category == "NoVehicle" | Category == "OneVehicle" | Category == "MultipleVehicles")
  
comm_vehicles_avail_brough_unpivot_pct <- comm_vehicles_avail_brough_unpivot %>%
  filter(Category == "PctNoVehicle" | Category == "PctOneVehicle" | Category == "PctMultipleVehicles")


# Absolute vehicles figure
ggplot(data = comm_vehicles_avail_brough_unpivot_abs) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Vehicular Access by Borough", subtitle = "Total households", x = "Residence Borough", y = "Households", fill = "Vehicles Available") +
  scale_fill_manual(values = c("NoVehicle" = "#003f5c"
                                , "OneVehicle" = "#bc5090"
                                , "MultipleVehicles" = "#ffa600")
                    ,labels = c("NoVehicle" = "None"
                                , "OneVehicle" = "One"
                                , "MultipleVehicles" = "Multiple")) +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Vehicle_Access_Borough_Abs.png", units = "in", height = 8, width = 12)


# Percentages
ggplot(data = comm_vehicles_avail_brough_unpivot_pct) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Vehicular Access by Borough", subtitle = "Share of households", x = "Residence Borough", y = "Percent of Households", fill = "Vehicles Available") +
  scale_fill_manual(values = c("PctNoVehicle" = "#003f5c"
                                , "PctOneVehicle" = "#bc5090"
                                , "PctMultipleVehicles" = "#ffa600")
                    ,labels = c("PctNoVehicle" = "None"
                                , "PctOneVehicle" = "One"
                                , "PctMultipleVehicles" = "Multiple")) +  
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Vehicle_Access_Borough_Pct.png", units = "in", height = 8, width = 12)


# Remove intermediate tables
remove(comm_vehicles_avail_brough_unpivot)
remove(comm_vehicles_avail_brough_unpivot_abs)
remove(comm_vehicles_avail_brough_unpivot_pct)

Means of Commuting

comm_means_brough <- comm_means %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(TotalPop = sum(TotalPop),
            Auto = sum(Automobile)
            ,Transit = sum(PublicTransit)
            ,Bus = sum(Bus)
            ,Subway = sum(Subway)
            ,CommuterRail = sum(CommuterRail)
            ,Walk = sum(Walked))


comm_means_brough$PctAuto <-  comm_means_brough$Auto / comm_means_brough$TotalPop
comm_means_brough$PctTransit <- comm_means_brough$Transit / comm_means_brough$TotalPop
comm_means_brough$PctBus <- comm_means_brough$Bus / comm_means_brough$TotalPop
comm_means_brough$PctSubway <- comm_means_brough$Subway / comm_means_brough$TotalPop
comm_means_brough$PctCommuterRail <- comm_means_brough$CommuterRail / comm_means_brough$TotalPop
comm_means_brough$PctWalked <- comm_means_brough$Walk / comm_means_brough$TotalPop


comm_means_brough_unpivot <- melt(data.frame(comm_means_brough))

colnames(comm_means_brough_unpivot) <- c("BoroughFinal","Category","Estimate")

# Simple categories
comm_means_brough_unpivot_abs_simple <- comm_means_brough_unpivot %>%
  filter(Category == "Auto" | Category == "Transit" | Category == "Walk")
  
comm_means_brough_unpivot_pct_simple <- comm_means_brough_unpivot %>%
  filter(Category == "PctAuto" | Category == "PctTransit" | Category == "PctWalked")


# Breakdown of transit
comm_means_brough_unpivot_abs_transit <- comm_means_brough_unpivot %>%
  filter(Category == "Auto" | Category == "Bus" | Category == "Subway" | Category == "CommuterRail" | Category == "Walk")
  
comm_means_brough_unpivot_pct_transit <- comm_means_brough_unpivot %>%
  filter(Category == "PctAuto" | Category == "PctBus" | Category == "PctSubway" | Category == "PctCommuterRail" | Category == "PctWalked")


# Simple
ggplot(data = comm_means_brough_unpivot_abs_simple) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Means of Commuting", subtitle = "Total workers", x = "Residence Borough", y = "Workers", fill = "Means of Transport") +
  scale_fill_manual(values = c("Auto" = "#003f5c"
                                , "Transit" = "#bc5090"
                                , "Walk" = "#ffa600")
                    ,labels = c("Auto" = "Car"
                                , "Transit" = "Transit"
                                , "Walk" = "Walk")) +  
   scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Means_Borough_Simple_Abs.png", units = "in", height = 8, width = 12)


# Simple - Percent
ggplot(data = comm_means_brough_unpivot_pct_simple) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Means of Commuting", subtitle = "Share of workers", x = "Residence Borough", y = "Percent of Workers", fill = "Means of Transport") +
  scale_fill_manual(values = c("PctAuto" = "#003f5c"
                                , "PctTransit" = "#bc5090"
                                , "PctWalked" = "#ffa600")
                    ,labels = c("PctAuto" = "Car"
                                , "PctTransit" = "Transit"
                                , "PctWalked" = "Walk")) +  
   scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Means_Borough_Simple_Pct.png", units = "in", height = 8, width = 12)



# Advanced
ggplot(data = comm_means_brough_unpivot_abs_transit) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Means of Commuting", subtitle = "Total workers - All methods of transportation", x = "Residence Borough", y = "Workers", fill = "Means of Transport") +
  scale_fill_manual(values = c("Auto" = "#003f5c"
                                , "Bus" = "#58508d"
                                , "Subway" = "#bc5090"
                                , "CommuterRail" = "#ff6361"
                                , "Walk" = "#ffa600")
                    ,labels = c("Auto" = "Car"
                                , "Bus" = "Bus"
                                , "Subway" = "Subway"
                                , "CommuterRail" = "Commuter Rail"
                                , "Walk" = "Walk")) +  
   scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Means_Borough_Advanced_Abs.png", units = "in", height = 8, width = 12)


# Advanced - Percent
ggplot(data = comm_means_brough_unpivot_pct_transit) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Means of Commuting", subtitle = "Share of workers - All methods of transportation", x = "Residence Borough", y = "Percent of Workers", fill = "Means of Transport") +
  scale_fill_manual(values = c("PctAuto" = "#003f5c"
                                , "PctBus" = "#58508d"
                                , "PctSubway" = "#bc5090"
                                , "PctCommuterRail" = "#ff6361"
                                , "PctWalked" = "#ffa600")
                    ,labels = c("PctAuto" = "Car"
                                , "PctBus" = "Bus"
                                , "PctSubway" = "Subway"
                                , "PctCommuterRail" = "Commuter Rail"
                                , "PctWalked" = "Walk")) +  
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Means_Borough_Advanced_Pct.png", units = "in", height = 8, width = 12)


remove(comm_means_brough_unpivot)
remove(comm_means_brough_unpivot_abs_simple)
remove(comm_means_brough_unpivot_pct_simple)
remove(comm_means_brough_unpivot_abs_transit)
remove(comm_means_brough_unpivot_pct_transit)

Travel Times

Automobile

# Auto Travelers
comm_t_time_auto_brough <- comm_t_time_auto %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(TotalAuto = sum(TotalAuto)
            ,UpTo15Mins  = sum(Auto_TT_0_10) + sum(Auto_TT_10_14)
            ,UpTo30Mins = sum(Auto_TT_15_19) + sum(Auto_TT_20_24) + sum(Auto_TT_25_29)
            ,UpTo45Mins = sum(Auto_TT_30_34) + sum(Auto_TT_35_44)
            ,Upto60Mins = sum(Auto_TT_45_59)
            ,MoreThan60Mins = sum(Auto_TT_60_Plus))

comm_t_time_auto_brough$Pct15 <- comm_t_time_auto_brough$UpTo15Mins / comm_t_time_auto_brough$TotalAuto
comm_t_time_auto_brough$Pct30 <- comm_t_time_auto_brough$UpTo30Mins / comm_t_time_auto_brough$TotalAuto
comm_t_time_auto_brough$Pct45 <- comm_t_time_auto_brough$UpTo45Mins / comm_t_time_auto_brough$TotalAuto
comm_t_time_auto_brough$Pct60 <- comm_t_time_auto_brough$Upto60Mins / comm_t_time_auto_brough$TotalAuto
comm_t_time_auto_brough$Pct100 <- comm_t_time_auto_brough$MoreThan60Mins / comm_t_time_auto_brough$TotalAuto

comm_t_time_auto_brough_unpivot <- melt(data.frame(comm_t_time_auto_brough))

colnames(comm_t_time_auto_brough_unpivot) <- c("BoroughFinal","Category","Estimate")


comm_t_time_auto_brough_unpivot_abs <- comm_t_time_auto_brough_unpivot %>%
  filter(Category == "UpTo15Mins" | Category == "UpTo30Mins" | Category == "UpTo45Mins"| Category == "Upto60Mins" | Category == "MoreThan60Mins" )
  
comm_t_time_auto_brough_unpivot_pct <- comm_t_time_auto_brough_unpivot %>%
  filter(Category == "Pct15" | Category == "Pct30" | Category == "Pct45"| Category == "Pct60" | Category == "Pct100" )


# Absolute figure
ggplot(data = comm_t_time_auto_brough_unpivot_abs) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Travel Time", subtitle = "Auto commuters", x = "Residence Borough", y = "Drivers", fill = "Average Commute Time") +
  scale_fill_manual(values = c("UpTo15Mins" = "#003f5c"
                                , "UpTo30Mins" = "#58508d"
                                , "UpTo45Mins" = "#bc5090"
                                , "Upto60Mins" = "#ff6361"
                                , "MoreThan60Mins" = "#ffa600")
                    ,labels = c("UpTo15Mins" = "0 - 14 Minutes"
                                , "UpTo30Mins" = "15 - 29 Minutes"
                                , "UpTo45Mins" = "30 - 44 Minutes"
                                , "Upto60Mins" = "45 - 59 Minutes"
                                , "MoreThan60Mins" = "60+ Minutes")) + 
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Time_Auto_Borough_Abs.png", units = "in", height = 8, width = 12)


# Percentage figure
ggplot(data = comm_t_time_auto_brough_unpivot_pct) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Travel Time", subtitle = "Share of auto commuters", x = "Residence Borough", y = "Percent of Drivers", fill = "Average Commute Time") +
  scale_fill_manual(values = c("Pct15" = "#003f5c"
                                , "Pct30" = "#58508d"
                                , "Pct45" = "#bc5090"
                                , "Pct60" = "#ff6361"
                                , "Pct100" = "#ffa600")
                    ,labels = c("Pct15" = "0 - 14 Minutes"
                                , "Pct30" = "15 - 29 Minutes"
                                , "Pct45" = "30 - 44 Minutes"
                                , "Pct60" = "45 - 59 Minutes"
                                , "Pct100" = "60+ Minutes")) + 
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Time_Auto_Borough_Pct.png", units = "in", height = 8, width = 12)

remove(comm_t_time_auto_brough_unpivot)
remove(comm_t_time_auto_brough_unpivot_abs)
remove(comm_t_time_auto_brough_unpivot_pct)

Bus

# Bus Travelers
comm_t_time_bus_brough <- comm_t_time_bus %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal) %>%
  summarize(TotalBus = sum(TotalBus)
            ,UpTo15Mins  = sum(Bus_TT_0_10) + sum(Bus_TT_10_14)
            ,UpTo30Mins = sum(Bus_TT_15_19) + sum(Bus_TT_20_24) + sum(Bus_TT_25_29)
            ,UpTo45Mins = sum(Bus_TT_30_34) + sum(Bus_TT_35_44)
            ,Upto60Mins = sum(Bus_TT_45_59)
            ,MoreThan60Mins = sum(Bus_TT_60_Plus))

comm_t_time_bus_brough$Pct15 <- comm_t_time_bus_brough$UpTo15Mins / comm_t_time_bus_brough$TotalBus
comm_t_time_bus_brough$Pct30 <- comm_t_time_bus_brough$UpTo30Mins / comm_t_time_bus_brough$TotalBus
comm_t_time_bus_brough$Pct45 <- comm_t_time_bus_brough$UpTo45Mins / comm_t_time_bus_brough$TotalBus
comm_t_time_bus_brough$Pct60 <- comm_t_time_bus_brough$Upto60Mins / comm_t_time_bus_brough$TotalBus
comm_t_time_bus_brough$Pct100 <- comm_t_time_bus_brough$MoreThan60Mins / comm_t_time_bus_brough$TotalBus

comm_t_time_bus_brough_unpivot <- melt(data.frame(comm_t_time_bus_brough))

colnames(comm_t_time_bus_brough_unpivot) <- c("BoroughFinal","Category","Estimate")


comm_t_time_bus_brough_unpivot_abs <- comm_t_time_bus_brough_unpivot %>%
  filter(Category == "UpTo15Mins" | Category == "UpTo30Mins" | Category == "UpTo45Mins"| Category == "Upto60Mins" | Category == "MoreThan60Mins" )
  
comm_t_time_bus_brough_unpivot_pct <- comm_t_time_bus_brough_unpivot %>%
  filter(Category == "Pct15" | Category == "Pct30" | Category == "Pct45"| Category == "Pct60" | Category == "Pct100" )


# Absolute figure
ggplot(data = comm_t_time_bus_brough_unpivot_abs) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Travel Time", subtitle = "Bus commuters", x = "Residence Borough", y = "Riders", fill = "Average Commute Time") +
  scale_fill_manual(values = c("UpTo15Mins" = "#003f5c"
                                , "UpTo30Mins" = "#58508d"
                                , "UpTo45Mins" = "#bc5090"
                                , "Upto60Mins" = "#ff6361"
                                , "MoreThan60Mins" = "#ffa600")
                    ,labels = c("UpTo15Mins" = "0 - 14 Minutes"
                                , "UpTo30Mins" = "15 - 29 Minutes"
                                , "UpTo45Mins" = "30 - 44 Minutes"
                                , "Upto60Mins" = "45 - 59 Minutes"
                                , "MoreThan60Mins" = "60+ Minutes")) + 
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Time_Bus_Borough_Abs.png", units = "in", height = 8, width = 12)


# Percentage figure
ggplot(data = comm_t_time_bus_brough_unpivot_pct) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) +
  labs(title="Travel Time", subtitle = "Share of bus commuters", x = "Residence Borough", y = "Percent of Riders", fill = "Average Commute Time") +
  scale_fill_manual(values = c("Pct15" = "#003f5c"
                                , "Pct30" = "#58508d"
                                , "Pct45" = "#bc5090"
                                , "Pct60" = "#ff6361"
                                , "Pct100" = "#ffa600")
                    ,labels = c("Pct15" = "0 - 14 Minutes"
                                , "Pct30" = "15 - 29 Minutes"
                                , "Pct45" = "30 - 44 Minutes"
                                , "Pct60" = "45 - 59 Minutes"
                                , "Pct100" = "60+ Minutes")) + 
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.x = element_text(angle = -45)
        , axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10)))

ggsave("./graphics/Commute/Time_Bus_Borough_Pct.png", units = "in", height = 8, width = 12)

remove(comm_t_time_bus_brough_unpivot)
remove(comm_t_time_bus_brough_unpivot_abs)
remove(comm_t_time_bus_brough_unpivot_pct)

Exploratory Data Maps

Orientation Map

brough_pal <- c("#003f5c","#3283A8","#d62828","#f77f00","#fcbf49","#eae2b7")


borough_geom <- tracts_19_nyc %>%
  group_by(BoroughFinal) %>%
  summarise(geometry = sf::st_union(geometry))

cong_zone$Borough <- c("Congestion Zone")


tmap_mode("plot")

orientation_map <- tm_shape(borough_geom) + 
  tm_polygons(col = "BoroughFinal", palette = brough_pal,
              title = "Borough") +
  tm_borders(alpha = 0) +
tm_shape(bus_routes) +
  tm_lines(col = "grey40", lwd = 0.5) +
  tm_add_legend(type="line",
                labels ="Bus Routes",
                alpha=NA,
                col="grey40",
                lwd= 1) + 
tm_shape(sub_lines) +
  tm_lines(col = "cornsilk1", lwd = 1) +
  tm_add_legend(type="line",
                labels ="Subway Lines",
                alpha=NA,
                col="cornsilk1",
                lwd= 2) +    
tm_shape(cong_zone) +
  tm_borders(col = "#0CF7B5", lwd = 2) +
  tm_add_legend(type="line",
                labels ="Congestion Zone",
                alpha=NA,
                col="#0CF7B5",
                lwd= 2) +
tm_layout(title = "New York City")

orientation_map

tmap_save(orientation_map,"./Graphics/Maps/orientation_map.png", units = "in", height = 10, width = 10)

Earnings by Residence Area - All Workers

# Income by Res Area
od_res_all_chart <- lodes_od_nyc %>%
  filter(BoroughFinal.wrk != "Manhattan - Zone") %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal.res) %>%
  summarize(TotalWorkers = sum(TotalJobs)
            ,Earnings_0_1250 = sum(Earnings_0_1250)
            ,Earnings_1251_3332 = sum(Earnings_1251_3332)
            ,Earnings_3333_Plus = sum(Earnings_3333_Plus))


od_res_all_chart$Pct1250 <- od_res_all_chart$Earnings_0_1250 / od_res_all_chart$TotalWorkers
od_res_all_chart$Pct3332 <- od_res_all_chart$Earnings_1251_3332 / od_res_all_chart$TotalWorkers
od_res_all_chart$Pct3333 <- od_res_all_chart$Earnings_3333_Plus / od_res_all_chart$TotalWorkers


od_res_all_chart_1 <- od_res_all_chart[,c(1,8,7,6)]

od_res_all_chart_unpivot <- melt(data.frame(od_res_all_chart_1))

colnames(od_res_all_chart_unpivot) <- c("Borough","Category","Estimate")


ggplot(data = od_res_all_chart_unpivot) +
  geom_col(aes(x = as_factor(Borough), y = Estimate, fill = Category, label = Estimate), position = "fill", width = 0.7) +
  labs(title="Earnings by Residence Area", subtitle = "Workers Not Commuting To/Within Congestion Zone", x = "Residence Borough", y = "Percent of Workers", fill = "Monthly Earnings ($)") +
  scale_fill_manual(values = c("Pct3333" = "#003f5c"
                                , "Pct3332" = "#bc5090"
                                , "Pct1250" = "#ffa600")
                    ,labels = c("Pct3333" = "3,333+"
                                , "Pct3332" = "1,251 - 3,332"
                                , "Pct1250" = "0 - 1,250")) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10))) +
  coord_flip()

ggsave("./graphics/Lodes/Earnings_Res_Borough_All.png", units = "in", height = 8, width = 12)

remove(od_res_all_chart_1)
remove(od_res_all_chart_unpivot)

Earnings by Residence Area - “Zone” Workers

# Income by Res Area
od_res_zone_chart <- lodes_od_nyc %>%
  filter(BoroughFinal.wrk == "Manhattan - Zone") %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal.res) %>%
  summarize(TotalWorkers = sum(TotalJobs)
            ,Earnings_0_1250 = sum(Earnings_0_1250)
            ,Earnings_1251_3332 = sum(Earnings_1251_3332)
            ,Earnings_3333_Plus = sum(Earnings_3333_Plus))


od_res_zone_chart$Pct1250 <- od_res_zone_chart$Earnings_0_1250 / od_res_zone_chart$TotalWorkers
od_res_zone_chart$Pct3332 <- od_res_zone_chart$Earnings_1251_3332 / od_res_zone_chart$TotalWorkers
od_res_zone_chart$Pct3333 <- od_res_zone_chart$Earnings_3333_Plus / od_res_zone_chart$TotalWorkers


od_res_zone_chart_1 <- od_res_zone_chart[,c(1,8,7,6)]

od_res_zone_chart_unpivot <- melt(data.frame(od_res_zone_chart_1))

colnames(od_res_zone_chart_unpivot) <- c("Borough","Category","Estimate")


ggplot(data = od_res_zone_chart_unpivot) +
  geom_col(aes(x = as_factor(Borough), y = Estimate, fill = Category, label = Estimate), position = "fill", width = 0.7) +
  labs(title="Earnings by Residence Area", subtitle = "Workers Commuting To/Within Congestion Zone", x = "Residence Borough", y = "Percent of Workers", fill = "Monthly Earnings ($)") +
  scale_fill_manual(values = c("Pct3333" = "#003f5c"
                                , "Pct3332" = "#bc5090"
                                , "Pct1250" = "#ffa600")
                    ,labels = c("Pct3333" = "3,333+"
                                , "Pct3332" = "1,251 - 3,332"
                                , "Pct1250" = "0 - 1,250")) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10))) +
  coord_flip()

ggsave("./graphics/Lodes/Earnings_Res_Borough_Zone.png", units = "in", height = 8, width = 12)

remove(od_res_zone_chart_1)
remove(od_res_zone_chart_unpivot)

Earnings by Work Area - All Workers

# Income by Wrk Area
od_wrk_all_chart <- lodes_od_nyc %>%
  st_set_geometry(NULL) %>%
  group_by(BoroughFinal.wrk) %>%
  summarize(TotalWorkers = sum(TotalJobs)
            ,Earnings_0_1250 = sum(Earnings_0_1250)
            ,Earnings_1251_3332 = sum(Earnings_1251_3332)
            ,Earnings_3333_Plus = sum(Earnings_3333_Plus))


od_wrk_all_chart$Pct1250 <- od_wrk_all_chart$Earnings_0_1250 / od_wrk_all_chart$TotalWorkers
od_wrk_all_chart$Pct3332 <- od_wrk_all_chart$Earnings_1251_3332 / od_wrk_all_chart$TotalWorkers
od_wrk_all_chart$Pct3333 <- od_wrk_all_chart$Earnings_3333_Plus / od_wrk_all_chart$TotalWorkers


od_wrk_all_chart_1 <- od_wrk_all_chart[,c(1,8,7,6)]

od_wrk_all_chart_unpivot <- melt(data.frame(od_wrk_all_chart_1))

colnames(od_wrk_all_chart_unpivot) <- c("Borough","Category","Estimate")


ggplot(data = od_wrk_all_chart_unpivot) +
  geom_col(aes(x = as_factor(Borough), y = Estimate, fill = Category), position = "fill", width = 0.7) +
  labs(title="Earnings by Work Area", subtitle = "All Workers Living in NYC", x = "Work Borough", y = "Percent of Workers", fill = "Monthly Earnings ($)") +
  scale_fill_manual(values = c("Pct3333" = "#003f5c"
                                , "Pct3332" = "#bc5090"
                                , "Pct1250" = "#ffa600")
                    ,labels = c("Pct3333" = "3,333+"
                                , "Pct3332" = "1,251 - 3,332"
                                , "Pct1250" = "0 - 1,250")) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10))) +
  coord_flip()

ggsave("./graphics/Lodes/Earnings_Wrk_Borough_All.png", units = "in", height = 8, width = 12)

remove(od_wrk_all_chart_1)
remove(od_wrk_all_chart_unpivot)

Map: Flows by Residence Area - “Zone” Workers

See Appendix A.4

Map: Flows by Residence Area - “Zone” Low-Income Workers

See Appendix A.5

Workers Priced by Res Area and…

# Drivers by Tract
driver_pct_tract_lu <- comm_means[,c(1,3,5,6,12)]

driver_pct_tract_lu$PctDrive <- round(driver_pct_tract_lu$Automobile/driver_pct_tract_lu$TotalPop,2)

driver_pct_tract_lu <- driver_pct_tract_lu %>%
  st_set_geometry(NULL) %>%
  select(GEOID, BoroughFinal, PctDrive)


# Workers into Zone by Tract
colnames(lodes_od_nyc_zone_workers)

wrk_flows_tract_base <- lodes_od_nyc_zone_workers %>%
  st_set_geometry(NULL) %>%
  group_by(GEOID_Res) %>%
  summarize(FlowsToZone = sum(TotalJobs)
            , Earn_1250_Zone = sum(Earnings_0_1250)
            , Earn_3332_Zone = sum(Earnings_1251_3332)
            , Earn_3333_Zone = sum (Earnings_3333_Plus))

colnames(wrk_flows_tract_base)

colnames(wrk_flows_tract_base)[1] <- "GEOID"

colnames(wrk_flows_tract_base)


# Combine
wrk_flows_drive_discount_0 <- left_join(wrk_flows_tract_base, driver_pct_tract_lu, by = "GEOID")

# wrk_flows_drive_discount_0 %>%
#   filter(is.na(PctDrive) == TRUE)

wrk_flows_drive_discount <- wrk_flows_drive_discount_0 %>%
  filter(is.na(PctDrive) == FALSE)

remove(wrk_flows_drive_discount_0)


# Apply discount
wrk_flows_drive_discount$Priced_Pop <- round(wrk_flows_drive_discount$FlowsToZone * wrk_flows_drive_discount$PctDrive,0)
wrk_flows_drive_discount$Priced_Earn_1250 <- round(wrk_flows_drive_discount$Earn_1250_Zone * wrk_flows_drive_discount$PctDrive,0)
wrk_flows_drive_discount$Priced_Earn_3332 <- round(wrk_flows_drive_discount$Earn_3332_Zone * wrk_flows_drive_discount$PctDrive,0)
wrk_flows_drive_discount$Priced_Earn_3333 <- round(wrk_flows_drive_discount$Earn_3333_Zone * wrk_flows_drive_discount$PctDrive,0)


wrk_flows_drive_discount <- wrk_flows_drive_discount[,c(1,6,8:11)]

# Group to Borough Level
# wrk_flows_drive_brough <- wrk_flows_drive_discount %>%
#   group_by(BoroughFinal) %>%
#   summarize(Priced_Pop = round(sum(Priced_Pop),0)
#             , Priced_Earn_1250 = round(sum(Priced_Earn_1250),0)
#             , Priced_Earn_3332 = round(sum(Priced_Earn_3332),0)
#             , Priced_Earn_3333 = round(sum(Priced_Earn_3333),0)) 
# 
# remove(driver_pct_tract_lu)
# remove(wrk_flows_tract_base)

# Workers' Racial Composition
colnames(lodes_rac_nyc)

lodes_rac_nyc$PctWhite <- (lodes_rac_nyc$Workers_White/lodes_rac_nyc$TotalJobs)
lodes_rac_nyc$PctNonWhite <- ((lodes_rac_nyc$TotalJobs - lodes_rac_nyc$Workers_White)/lodes_rac_nyc$TotalJobs)

lodes_rac_discount <- lodes_rac_nyc[,c("GEOID", "TotalJobs","PctWhite", "PctNonWhite")]

lodes_rac_discount <- lodes_rac_discount %>%
  st_set_geometry(NULL)

# Join it all together 
wrk_flows_drive_join <- left_join(wrk_flows_drive_discount, lodes_rac_discount, by = "GEOID")


# Priced by race fields
wrk_flows_drive_join$Priced_White <- round(wrk_flows_drive_join$Priced_Pop * wrk_flows_drive_join$PctWhite,0)
wrk_flows_drive_join$Priced_NonWhite <- round(wrk_flows_drive_join$Priced_Pop * wrk_flows_drive_join$PctNonWhite,0)


work_flows_drive_final <- wrk_flows_drive_join

remove(wrk_flows_drive_join)

# Roll up to borough level
colnames(work_flows_drive_final)

work_flows_drive_borough <- work_flows_drive_final %>%
  group_by(BoroughFinal) %>%
  summarize(Population = sum(TotalJobs)
            , Priced_Pop = sum(Priced_Pop)
            , Priced_Earn_1250 = sum(Priced_Earn_1250)
            , Priced_Earn_3332 = sum(Priced_Earn_3332)
            , Priced_Earn_3333 = sum(Priced_Earn_3333)
            , Priced_White = sum(Priced_White)
            , Priced_NonWhite = sum(Priced_NonWhite))


# Priced by race fields
colnames(work_flows_drive_borough)

work_flows_drive_borough$PctPriced <- work_flows_drive_borough$Priced_Pop/work_flows_drive_borough$Population
work_flows_drive_borough$PctPriced_1250 <- work_flows_drive_borough$Priced_Earn_1250/work_flows_drive_borough$Population
work_flows_drive_borough$PctPriced_3332 <- work_flows_drive_borough$Priced_Earn_3332/work_flows_drive_borough$Population
work_flows_drive_borough$PctPriced_3333 <- work_flows_drive_borough$Priced_Earn_3333/work_flows_drive_borough$Population
work_flows_drive_borough$PctPriced_White <- work_flows_drive_borough$Priced_White/work_flows_drive_borough$Population
work_flows_drive_borough$PctPriced_NonWhite <- work_flows_drive_borough$Priced_NonWhite/work_flows_drive_borough$Population


# Melt the data frame
colnames(work_flows_drive_borough)

work_flows_drive_final_melt <- work_flows_drive_borough[,c(1,9:14)]

work_flows_drive_final_melt <- melt(data.frame(work_flows_drive_final_melt))

colnames(work_flows_drive_final_melt) <- c("BoroughFinal", "Category", "Estimate")

remove(wrk_flows_drive_join)
remove(wrk_flows_tract_base)
remove(work_flows_drive_discount)
remove(work_flows_drive_final)

… Income

plot_inc_flows <- work_flows_drive_final_melt %>%
  filter(Category == "PctPriced_1250" | Category == "PctPriced_3332" | Category == "PctPriced_3333")

ggplot(data = plot_inc_flows) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) + 
  labs(title="Earnings of Priced Workers by Residence Area", subtitle="Share of working population", x = "Residence Borough", y = "Percent of Workers", fill = "Monthly Earnings ($)") +
  scale_fill_manual(values = c("PctPriced_3333" = "#003f5c" 
                                , "PctPriced_3332" = "#bc5090"
                                , "PctPriced_1250" = "#ffa600")
                    ,labels = c("PctPriced_3333" = "3,333+"
                                , "PctPriced_3332" = "1,251 - 3,332"
                                , "PctPriced_1250" = "0 - 1,250")) +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10))) +
  coord_flip()

ggsave("./graphics/Lodes/Priced_Earnings_Borough_Pct.png", units = "in", height = 8, width = 12)

remove(plot_inc_flows)

… Race

plot_race_flows <- work_flows_drive_final_melt %>%
  filter(Category == "PctPriced_White" | Category == "PctPriced_NonWhite")

ggplot(data = plot_race_flows) +
  geom_col(aes(x = as_factor(BoroughFinal), y = Estimate, fill = Category), position = position_dodge(width = 0.8), width = 0.7) + 
  labs(title="Race of Priced Workers by Residence Area", subtitle="Share of working population", x = "Residence Borough", y = "Percent of Workers", fill = "Worker Race") +
  scale_fill_manual(values = c("PctPriced_NonWhite" = "#003f5c"
                                , "PctPriced_White" = "#ffa600")
                    ,labels = c("PctPriced_NonWhite" = "Non-White"
                                , "PctPriced_White" = "White")) +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.ticks = element_blank()
        , axis.title.x = element_text(margin=margin(t=10))
        , axis.title.y = element_text(margin=margin(r=10))) +
  coord_flip()

ggsave("./graphics/Lodes/Priced_Race_Borough_Pct.png", units = "in", height = 8, width = 12)

remove(plot_race_flows)

Flow Map

#Subset 
flow_chart_0 <- wrk_flows_drive_discount[,c(1:3)]


# Bring in Zone Geometry
cong_zone_centroid <- st_centroid(cong_zone)

cong_coords_0 <- unlist(cong_zone_centroid$geometry)
cong_coords_df <- data.frame(cong_coords_0)

cong_row <- seq_len(nrow(cong_coords_df)) %% 2 


cong_row_odd <- cong_coords_df[cong_row == 1, ]
cong_row_even <- cong_coords_df[cong_row == 0, ]

cong_coords <- cbind(data.frame(cong_row_odd), data.frame(cong_row_even))

colnames(cong_coords) <- c("Cong_Lat", "Cong_Long")


remove(cong_zone_centroid)
remove(cong_coords_0)
remove(cong_coords_df)
remove(cong_row)
remove(cong_row_odd)
remove(cong_row_even)


# Bring ins Res Geometry
res_geom <- lodes_od_nyc %>%
  st_set_geometry(NULL) %>%
  select(GEOID_Res,Res_Lat,Res_Long) %>%
  unique()

colnames(res_geom)[1] <- c("GEOID")


# Bring Geometries into Data Frame
flow_chart_1 <- left_join(flow_chart_0, res_geom, by = "GEOID")

# flow_chart_1 %>%
#   filter(is.na(Res_Lat) == TRUE)


flow_chart_1$cong_lat <- cong_coords$Cong_Lat
flow_chart_1$cong_long <- cong_coords$Cong_Long


flow_chart <- flow_chart_1

flow_chart$Priced_Pop <- round(flow_chart$Priced_Pop,0)

remove(flow_chart_0)
remove(flow_chart_1)


# Further limit to increase legibility
# quantile(flow_chart$Priced_Pop)

nyc_zone_wrk_ggplot_sub <- flow_chart %>%
  filter(Priced_Pop >= 100)

nyc_zone_wrk_ggplot_sub$FlowCat <- case_when(nyc_zone_wrk_ggplot_sub$Priced_Pop < 200 ~ "100 - 199"
                                             ,nyc_zone_wrk_ggplot_sub$Priced_Pop < 300 ~ "200 - 299"
                                             ,nyc_zone_wrk_ggplot_sub$Priced_Pop < 400 ~ "300 - 399"
                                             ,nyc_zone_wrk_ggplot_sub$Priced_Pop < 500 ~ "400 - 499"
                                             ,nyc_zone_wrk_ggplot_sub$Priced_Pop < 10000 ~ "500+")


# nyc_zone_wrk_ggplot_sub %>%
#   st_set_geometry(NULL) %>%
#   select(FlowCat) %>%
#   unique()


ggplot() +
  geom_sf(data = borough_geom, fill = "white", color = "#003f5c") +
  geom_segment(data=nyc_zone_wrk_ggplot_sub, aes(x = Res_Lat, y = Res_Long, xend = cong_lat, yend = cong_long, col = FlowCat, size = FlowCat)) +
  geom_sf(data = cong_zone, fill = alpha("white",0), color = "#0CF7B5", size = 0.75) +
  labs(title="Priced Worker Flows into Congestion Zone", subtitle = "From Residence Areas With at Least 100 Priced Drivers", col = "Priced Workers", size = "") +
  scale_color_manual(values = c("500+" = "#003f5c"
                                , "400 - 499" = "#58508d"
                                , "300 - 399" = "#bc5090"
                                , "200 - 299" = "#ff6361"
                                , "100 - 199" = "#ffa600")
                    ,labels = c("500+" = "500+"
                                , "400 - 499" = "400 - 499"
                                , "300 - 399" = "300 - 399"
                                , "200 - 299" = "200 - 299"
                                , "100 - 199" = "100 - 199")) + 
  scale_size_manual(values = c("500+" = 1.5
                                 , "400 - 499" = 1.25
                                 , "300 - 399" = 1
                                 , "200 - 299" = 0.75
                                 , "100 - 199" = 0.5)) +
  theme(panel.background = element_rect(fill='grey60', color = "grey60"), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_blank(), axis.text = element_blank(), 
        axis.title = element_blank(), axis.ticks = element_blank())

ggsave("./graphics/Maps/Priced_Flows_Static.png", units = "in", height = 8, width = 12)

remove(nyc_zone_wrk_ggplot_sub)
remove(wrk_flows_drive_discount)

Potentially Priced Populations in New York City’s Congestion Zone - Full Script

Tyler Pearre

10 May 22

Data Wrangling

Congestion Zone

New York City Geometry Codes

Get Census Geometry

Demographic Variables

Commute Data

LODES Data

Bus Routes

Subway Lines

Borough-Level Exploratory Data Analysis

Demographics

Race

Income

Age

Commute

Vehicular Access

Means of Commuting

Travel Times

Automobile

Bus

Exploratory Data Maps

Orientation Map

Transit Access for Non-White Households

Transit Access for Low-Income Households

Transit Access for Zero-Vehicle Households

Earnings by Residence Area - All Workers

Earnings by Residence Area - “Zone” Workers

Earnings by Work Area - All Workers

Map: Flows by Residence Area - “Zone” Workers

Map: Flows by Residence Area - “Zone” Low-Income Workers

Workers Priced by Res Area and…

… Income

… Race

Flow Map