We are combining data about the quantity and quality of public transit in New York City. While we could have added many other data to our model, we limited the scope to include: Subway (and Staten Island Railroad), Bus, Citibike, and Ferry.
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidycensus)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(gt)
acs_tracts_raw <- read_csv('acs_tracts_raw.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## GEOID = col_double(),
## NAME = col_character(),
## total_populationE = col_double(),
## total_populationM = col_double()
## )
NTA_Census_Conversion <- read_csv('NTA_Census_Conversion.csv')
## Parsed with column specification:
## cols(
## id2 = col_double(),
## NTA_Code = col_character(),
## NTA_Name = col_character()
## )
NTA_Populations <- read_csv('NTA_Population_5yrEstimate_2018.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## NTA_Code = col_character(),
## TotalNTAPop = col_double()
## )
NTA_Jobs <-read_csv('NTA_Job_Count.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## NTACode = col_character(),
## TotalNTAJobs = col_double()
## )
NTA_Codes <- read_csv('NTA_Codes.csv')
## Parsed with column specification:
## cols(
## NTA_Code = col_character(),
## NTA_Name = col_character()
## )
#Ferry data
NYC_Ferry <- read_csv('NYC_Ferry_Locations.csv')
## Parsed with column specification:
## cols(
## `Ferry Station` = col_character(),
## lon = col_double(),
## lat = col_double(),
## Line = col_character(),
## NTA_Code = col_character(),
## NTA_Name = col_character()
## )
#Bus data
NYC_BusShelters <- read_csv('BusShelters_NTA.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## location = col_character(),
## segment_id = col_character(),
## at_between = col_character(),
## shelter_id = col_character(),
## boro_name = col_character(),
## street = col_character(),
## boroname = col_character(),
## countyfips = col_character(),
## ntacode = col_character(),
## ntaname = col_character()
## )
## See spec(...) for full column specifications.
NYC_Bus <- read_csv('Bus Stops, Routes, NTAs.csv')
## Parsed with column specification:
## cols(
## .default = col_character(),
## stop_id = col_double(),
## stop_lat = col_double(),
## stop_lon = col_double(),
## GEOID = col_double(),
## borocode = col_double(),
## shape_area = col_double(),
## shape_leng = col_double(),
## borocode_2 = col_double(),
## shape_ar_1 = col_double(),
## shape_le_1 = col_double()
## )
## See spec(...) for full column specifications.
Bus_Route_Ratings <- read_csv('Bus route ratings.csv')
## Parsed with column specification:
## cols(
## Line = col_character(),
## `Report Card` = col_character(),
## Score = col_character(),
## rank = col_double(),
## `\`` = col_logical()
## )
#Subway data
NYC_Subway <- read_csv('Subway Stops with NTAs.csv')
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Station ID` = col_double(),
## `Station La` = col_double(),
## `Station Lo` = col_double(),
## Route8 = col_double(),
## Route9 = col_double(),
## Route10 = col_logical(),
## Route11 = col_logical(),
## ADA = col_logical(),
## `Free Cross` = col_logical(),
## `Entrance L` = col_double(),
## Entrance_1 = col_double(),
## borocode = col_double(),
## shape_area = col_double(),
## shape_leng = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 46 parsing failures.
## row col expected actual file
## 1106 Route10 1/0/T/F/TRUE/FALSE 3 'Subway Stops with NTAs.csv'
## 1106 Route11 1/0/T/F/TRUE/FALSE 7 'Subway Stops with NTAs.csv'
## 1107 Route10 1/0/T/F/TRUE/FALSE 3 'Subway Stops with NTAs.csv'
## 1107 Route11 1/0/T/F/TRUE/FALSE 7 'Subway Stops with NTAs.csv'
## 1108 Route10 1/0/T/F/TRUE/FALSE 3 'Subway Stops with NTAs.csv'
## .... ....... .................. ...... ............................
## See problems(...) for more details.
Subway_Train_Addl_Time <-read_csv('Additional Train Time.csv')
## Parsed with column specification:
## cols(
## month = col_character(),
## division = col_character(),
## line = col_character(),
## period = col_character(),
## num_passengers = col_double(),
## `additional train time` = col_double()
## )
Subway_Frequency <- read_csv('Subway Frequencies.csv')
## Parsed with column specification:
## cols(
## line = col_character(),
## `TPH - rush hr` = col_double(),
## `TPH offpeak` = col_double(),
## `TPH - late nights` = col_double()
## )
SIRR_Score <- read_csv('SIRR_Score.csv')
## Parsed with column specification:
## cols(
## Station_Name = col_character(),
## NTA = col_character(),
## Survey_Rate = col_double(),
## Survey_Score = col_double(),
## SIRR_Score = col_double()
## )
#Citibike data
NYC_Citibikes <- read_csv('CB_Stations_NTA.csv')
## Parsed with column specification:
## cols(
## BoroCode = col_double(),
## BoroName = col_character(),
## CountyFIPS = col_character(),
## NTACode = col_character(),
## NTAName = col_character(),
## Shape_Leng = col_double(),
## Shape_Area = col_double(),
## NUMPOINTS = col_double()
## )
We have determined that we want to express our Transit Accessibility Index Scores by Neighborhood Tabulation Area (NTA). Therefore, to build census data into our score and interpretation of the score, we first need to denote which tracts are in each NTA.
NTACensusPop <- acs_tracts_raw %>%
left_join(NTA_Census_Conversion, by = c("GEOID"="id2")) %>%
select(GEOID, NAME, total_populationE, NTA_Code, NTA_Name) %>%
group_by(NTA_Code) %>%
summarise(TotalNTAPop = sum(total_populationE)) %>%
ungroup()
## `summarise()` ungrouping output (override with `.groups` argument)
We calculated Ferry Scores simply, as they do not have an impact on many New Yorkers transportation choices as there are limited start and end points. If an NTA has at least one ferry stop, its ferry score is 1.
Ferry_Scores <- NYC_Ferry %>%
mutate(ferry_score = 1)
NTA_W_Ferry_Score <-
left_join(NTA_Codes, Ferry_Scores, by="NTA_Code") %>%
select(NTA_Code, NTA_Name.x, ferry_score)
#Read NA as 0
NTA_W_Ferry_Score[is.na(NTA_W_Ferry_Score)] = 0
Cleaned_NTA_W_Ferry_Score <- NTA_W_Ferry_Score %>%
group_by(NTA_Code) %>%
summarize(ferry_score=sum(ferry_score)) %>%
mutate_if(is.numeric, ~1 * (. != 0))
## `summarise()` ungrouping output (override with `.groups` argument)
To create a bus score, we will add the number of bus shelters to the sum of bus stop scores in each NTA. Bus stop scores range from 0-5, based on the data collected by the Bus Turnaround Project, including: speed, reliability, and frequency.
BusShelter_Score <- NYC_BusShelters %>%
mutate(busshelter_count = 1) %>%
group_by(ntacode) %>%
summarize(bus_shelters=sum(busshelter_count)) %>%
mutate(
busshelter_multiplier = 1,
bus_shelter_score = bus_shelters* busshelter_multiplier) %>%
select(ntacode, bus_shelter_score)
## `summarise()` ungrouping output (override with `.groups` argument)
#Join shelter scores with existing Master NTA list with ferry scores
NTA_W_Ferry_BusShelter_Score <- left_join(Cleaned_NTA_W_Ferry_Score, BusShelter_Score, by = c("NTA_Code"="ntacode"))
#Now adding bus route scores to that same Master list
Bus_Route_Score <- left_join(NYC_Bus, Bus_Route_Ratings, by = c("route_id"="Line")) %>%
select(route_id, ntacode, rank) %>%
#These scores were unrated by the Bus Turnaround Project. They are part time, or unlisted buses.
replace_na(list(rank = 0)) %>%
arrange(desc(ntacode))%>%
#A few of the bus routes didn't join to NTAs because these stops are not within the 5 boroughs of NYC, so I'm removing those rows.
na.omit() %>%
group_by(ntacode) %>%
summarize(bus_route_score=sum(rank))
## `summarise()` ungrouping output (override with `.groups` argument)
NTA_W_Ferry_Bus_Score <- left_join(NTA_W_Ferry_BusShelter_Score, Bus_Route_Score, by = c("NTA_Code" = "ntacode")) %>%
mutate(bus_score = bus_shelter_score + bus_route_score) %>%
select(NTA_Code, bus_score, ferry_score)
The way the subway score is calculated results in it being weighted heaviest of all as the subway accounts for roughly 70% of public transit ridership.
While ADA accessible subway stations are critical for many New Yorkers, for the majority of the population, that does not factor into their transportation options, so we did not include that in our index. At the bottom of this analysis, we calculate an ADA-only score.
Cleaned_NYC_Subway <- NYC_Subway %>%
select(`Station ID`, Route1, Route2, Route3, Route4, Route5, Route6, Route7, Route8, Route9, Route10, Route11, ADA, ntacode) %>%
mutate(Route8 = as.character(Route8),
Route9 = as.character(Route9),
Route10 = as.character(Route10),
Route11 = as.character(Route11)
)
Subway_Line_Ratings <- Subway_Train_Addl_Time %>%
group_by(line, period) %>%
summarize(Avg_Addl_Train_Time = mean(`additional train time`)) %>%
pivot_wider(
names_from = period,
names_sep = ".",
values_from = Avg_Addl_Train_Time
)
## `summarise()` regrouping output by 'line' (override with `.groups` argument)
#Separating out the JZ and S lines because they appear differently in the route data and we need to join these later. I started by making a dataframe with the lines mirroring the other subway csv's lines and copying the offpeak peak averages into the new dataframe. Then I'll bind the new dataframe to the bottom of the original list so it's ready to join.
JZS_Subway_Line_Ratings <- tibble(
line = c('J','Z', 'FS', 'GS', 'H'),
offpeak = c(1.60704545, 1.60704545, 0.70318182, 0.43651515, 0.2940909),
peak =c(1.75886364, 1.75886364, 0.71318182, 0.46515152, 0.1620455),
)
JGS_Final_Subway_Line_Ratings <-Subway_Line_Ratings %>%
bind_rows(JZS_Subway_Line_Ratings) %>%
mutate(Line_Score = ((offpeak*.76)+ (peak*.24))) #In one week, 40 hours (24%) are peak and 128 hours (76%) are offpeak.
#Cleaning up the other line ratings data:
SSubway_Frequencies.df <- tibble(
line = c('FS', 'GS', 'H'),
tph_rush_hr = c(6, 20, 5),
tph_offpeak = c(6, 12, 6),
tph_late_nights = c(3, NA, 3)
)
Cleaned_Subway_Frequency <- Subway_Frequency %>%
clean_names()
S_Subway_Frequency <-Cleaned_Subway_Frequency %>%
bind_rows(SSubway_Frequencies.df)
S_Subway_Frequency[is.na(S_Subway_Frequency)] = 0
Subway_Frequency_Score <- S_Subway_Frequency %>%
mutate(train_frequency_score=(tph_rush_hr + tph_offpeak + tph_late_nights))
#Now merging together two different ratings data to create final ratings for each of the lines
Total_Subway_Score <- left_join(Subway_Frequency_Score,JGS_Final_Subway_Line_Ratings, by = "line") %>%
filter(!line %in% c("S Rock", "S 42nd", "S Fkln")) %>%
mutate(subway_score=train_frequency_score+Line_Score) %>%
select(line,subway_score)
#Merging the rating to the data frame with the subways by NTA.
NYC_Subway_Scores1 <- left_join(Cleaned_NYC_Subway, Total_Subway_Score, by = c( "Route1" = "line"))
NYC_Subway_Scores2 <-
left_join(NYC_Subway_Scores1, Total_Subway_Score, by = c( "Route2" = "line"))
NYC_Subway_Scores3 <-
left_join(NYC_Subway_Scores2, Total_Subway_Score, by = c( "Route3" = "line"))
NYC_Subway_Scores4 <-
left_join(NYC_Subway_Scores3, Total_Subway_Score, by = c( "Route4" = "line"))
NYC_Subway_Scores5 <-
left_join(NYC_Subway_Scores4, Total_Subway_Score, by = c( "Route5" = "line"))
NYC_Subway_Scores6 <-
left_join(NYC_Subway_Scores5, Total_Subway_Score, by = c( "Route6" = "line"))
NYC_Subway_Scores7 <-
left_join(NYC_Subway_Scores6, Total_Subway_Score, by = c( "Route7" = "line"))
NYC_Subway_Scores8 <-
left_join(NYC_Subway_Scores7, Total_Subway_Score, by = c( "Route8" = "line"))
NYC_Subway_Scores9 <-
left_join(NYC_Subway_Scores8, Total_Subway_Score, by = c( "Route9" = "line"))
Cleaned_Subway_Score <- NYC_Subway_Scores9 %>%
clean_names()
Cleaned_Subway_Score[c("subway_score_x", "subway_score_y", "subway_score_x_x", "subway_score_y_y", "subway_score_x_x_x", "subway_score_y_y_y", "subway_score_x_x_x_x", "subway_score_y_y_y_y", "subway_score")][is.na(Cleaned_Subway_Score[c("subway_score_x", "subway_score_y", "subway_score_x_x", "subway_score_y_y", "subway_score_x_x_x", "subway_score_y_y_y", "subway_score_x_x_x_x", "subway_score_y_y_y_y", "subway_score")])] <- 0
Final_Subway_Score <- Cleaned_Subway_Score %>%
mutate(Summed_Subway_Score = subway_score_x + subway_score_y + subway_score_x_x + subway_score_y_y + subway_score_x_x_x + subway_score_y_y_y + subway_score_x_x_x_x + subway_score_y_y_y_y + subway_score) %>%
group_by(ntacode) %>%
summarize(station_subway_score = sum(Summed_Subway_Score))
## `summarise()` ungrouping output (override with `.groups` argument)
#Add subway score to Master Score sheet
NTA_W_Ferry_Bus_Subway_Score <- left_join(NTA_W_Ferry_Bus_Score, Final_Subway_Score, by = c("NTA_Code" = "ntacode"))
NTA_W_Ferry_Bus_Subway_Score["station_subway_score"][is.na(NTA_W_Ferry_Bus_Subway_Score["station_subway_score"])] <- 0
#Add SIRR Scores
Grouped_SIRR_Score <- SIRR_Score %>%
group_by(NTA) %>%
summarize(total_sirr_score = sum(SIRR_Score))
## `summarise()` ungrouping output (override with `.groups` argument)
NTA_W_Ferry_Bus_Subway_SIRR_Score <-left_join(NTA_W_Ferry_Bus_Subway_Score, Grouped_SIRR_Score, by = c("NTA_Code" = "NTA"))
NTA_W_Ferry_Bus_Subway_SIRR_Score["total_sirr_score"][is.na(NTA_W_Ferry_Bus_Subway_SIRR_Score["total_sirr_score"])] <- 0
Adding the Citibike Score, which is calculated by summing the number of Citibike stations per NTA.
NTA_W_Ferry_Bus_Subway_Citibike_Score <- left_join(NTA_W_Ferry_Bus_Subway_SIRR_Score, NYC_Citibikes, by = c("NTA_Code" = "NTACode")) %>%
mutate(citibike_score = NUMPOINTS) %>%
select(citibike_score, ferry_score, station_subway_score, total_sirr_score, bus_score, NTA_Code)
NTA_size <- left_join(NTA_Populations, NTA_Jobs, by = c("NTA_Code" = "NTACode")) %>%
mutate(people_in_NTA = TotalNTAJobs + TotalNTAPop) %>%
select(people_in_NTA, NTA_Code)
TAI_Score <- left_join(NTA_W_Ferry_Bus_Subway_Citibike_Score, NTA_size, by = "NTA_Code") %>%
mutate(raw_TAI_score = ferry_score + bus_score + station_subway_score + total_sirr_score + citibike_score,
TAI_score_1 = (raw_TAI_score * 1000) / people_in_NTA,
subwaysirr_score = total_sirr_score + station_subway_score) %>%
adorn_rounding(digits = 0, rounding = "half up") %>%
#Omit the two remaining NAs: Rikers Island and LGA airport
na.omit()
Before analyzing the scores, we will remove the NTAs that are parks and cemeteries (NTAs ending with 99 and 98), which are the outliers in our scores.
mapped_NTAs <- TAI_Score %>%
filter(NTA_Code %in% c("MN40", "MN32", "MN31", "MN33", "BK63", "BK60", "BK91", "BK96", "BK58")) %>%
select(!c(total_sirr_score, station_subway_score))
mapped_NTAs[,c(which(colnames(mapped_NTAs)=="NTA_Code"),which(colnames(mapped_NTAs)!="NTA_Code"))]
## # A tibble: 9 x 8
## NTA_Code citibike_score ferry_score bus_score people_in_NTA raw_TAI_score
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 BK58 0 0 267 84399 267
## 2 BK60 4 0 215 84987 975
## 3 BK63 5 0 90 46701 796
## 4 BK91 0 0 105 63539 253
## 5 BK96 0 0 179 63141 179
## 6 MN31 17 1 155 142020 200
## 7 MN32 14 1 103 99238 118
## 8 MN33 17 0 99 92807 266
## 9 MN40 15 0 81 123064 2715
## # … with 2 more variables: TAI_score_1 <dbl>, subwaysirr_score <dbl>
write.csv(mapped_NTAs, 'mapped_NTAs.csv')
Categorizing the scores:
Summarized_TAI <- TAI_Score %>%
#mutate(score_category = TAI_score_1) %>%
mutate(score_cat = ifelse(TAI_score_1<5, "Low", ifelse(TAI_score_1<9,"Below Average", ifelse(TAI_score_1<15,"Average", ifelse(TAI_score_1<23,"Above Average", ifelse(TAI_score_1<35,"Strong", "Other")))))
) %>%
mutate(borough=substr(NTA_Code, start = 1, stop = 2)) %>%
filter(score_cat!="Other")
graphed_scores <- ggplot(data = Summarized_TAI, aes(score_cat)) +
geom_bar(stat="count", width = .75) +
theme_minimal() +
labs(x=NULL, y= NULL, title = "Breakdown of Neighborhood Tabulation Areas in NYC by TAI Score") +
scale_x_discrete(limits=c("Low", "Below Average", "Average", "Above Average", "Strong")) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(limits=c(0,90))
graphed_scores
Breakdown of the worst scores:
#Where are the very "low" and "below average" scores
graphed_boro <- Summarized_TAI %>%
filter(score_cat %in% c("Low", "Below Average"))
table_weak <- graphed_boro %>%
group_by(borough) %>%
summarize (Number = n()) %>%
mutate(percent_of_weak_score = (Number/121)*100) %>%
adorn_rounding(digits = 0, rounding = "half up")
## `summarise()` ungrouping output (override with `.groups` argument)
#Total = 121 Low and Below Average Scores
# BK: 30(25%)
#BX 24 (20%)
# MN: 13(11%)
#QN: 46 (38%)
#SI: 8 (7%)
ggplot(table_weak, aes(x="", y=Number, fill=borough)) +
theme(axis.text.x=element_blank()) +
geom_bar(stat="identity", width=1, color = "white") +
coord_polar("y", start=0) +
theme_void() +
labs(x = NULL, y = NULL, fill = NULL, title = "Breakdown of 'Low' and 'Below Average' NTAs") +
scale_fill_manual (name = "Legend", labels = c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island"), values=c("lightcyan3", "indianred", "rosybrown2", "lightsteelblue", "slategray4", "#999999")) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label = paste(round(Number/ sum(Number) * 100, 0), "%"), x = 1.3),
position = position_stack(vjust = 0.5))
#Avg score per borough
Table_Mean <- Summarized_TAI %>%
group_by(borough) %>%
summarize(mean_score = mean(TAI_score_1))
## `summarise()` ungrouping output (override with `.groups` argument)
Table_Median <- Summarized_TAI %>%
group_by(borough) %>%
summarize(median_score = median(TAI_score_1))
## `summarise()` ungrouping output (override with `.groups` argument)
Table_boros <- left_join(Table_Mean, Table_Median, by = "borough") %>%
adorn_rounding(digits = 0, rounding = "half up") %>%
pivot_longer(!borough, names_to = "type", values_to = "score")
ggplot(data=Table_boros, aes(x=borough, y=score, fill=type)) +
theme_minimal() +
geom_bar(stat="identity", position="dodge", color="black") +
labs(x= NULL, y= "Score", title= "Score Breakdown by Borough") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(name = "Legend", labels = c("Mean Score", "Median Score"), values=c("skyblue3", "slategray")) +
scale_x_discrete(limits=c("MN","SI","BK","BX","QN"), labels=c("MN"="Manhattan", "SI"="Staten Island", "BK"="Brooklyn", "BX"="Bronx", "QN"="Queens"))
Top 5 table:
Top_5 <- Summarized_TAI %>%
arrange(desc(TAI_score_1)) %>%
select(NTA_Code, TAI_score_1, bus_score, subwaysirr_score, ferry_score, citibike_score) %>%
head() %>%
gt(rowname_col = "NTA_Code") %>%
tab_header(
title = md("**Top Five TAI Scores**")) %>%
tab_spanner(label =md("*Components of TAI Score*"), columns = vars(bus_score, subwaysirr_score, ferry_score, citibike_score)) %>%
cols_label(NTA_Code = "NTA", TAI_score_1 = md("**TAI Score**"), bus_score = "Bus Score", subwaysirr_score = "Subway Score", ferry_score="Ferry Score", citibike_score="Citibike Score")
Top_5
| Top Five TAI Scores | |||||
|---|---|---|---|---|---|
| TAI Score | Components of TAI Score | ||||
| Bus Score | Subway Score | Ferry Score | Citibike Score | ||
| BX63 | 32 | 89 | 1402 | 0 | 19 |
| MN23 | 32 | 99 | 4745 | 0 | 28 |
| BK38 | 27 | 232 | 3725 | 1 | 26 |
| QN61 | 27 | 415 | 1489 | 0 | 0 |
| BK09 | 26 | 28 | 997 | 1 | 12 |
| MN13 | 22 | 144 | 6798 | 0 | 38 |
This map of public transit access is meaningless for disabled New Yorkers. While many do not meet ADA standards, all ferrys and MTA buses are required to be ADA-accessible and many feature an on-boarding ramp. Only certain subway stations are ADA accessible and Citibikes are removed from the score.
ADA_Subway_Score <- Cleaned_Subway_Score %>%
filter(ada == "TRUE") %>% #Filter to only ADA accessible stations.
mutate(Summed_Subway_Score = subway_score_x + subway_score_y + subway_score_x_x + subway_score_y_y + subway_score_x_x_x + subway_score_y_y_y + subway_score_x_x_x_x + subway_score_y_y_y_y + subway_score) %>%
group_by(ntacode) %>%
summarize(station_subway_score = sum(Summed_Subway_Score))
## `summarise()` ungrouping output (override with `.groups` argument)
NTA_W_Ferry_Bus_ADASubway_Score <- left_join(NTA_W_Ferry_Bus_Score, ADA_Subway_Score, by = c("NTA_Code" = "ntacode"))
NTA_W_Ferry_Bus_ADASubway_Score["station_subway_score"][is.na(NTA_W_Ferry_Bus_ADASubway_Score["station_subway_score"])] <- 0
ADA_Grouped_SIRR_Score <- SIRR_Score %>%
filter(Station_Name %in% c("St. George", "Tottenville", "Dongan Hills", "Great Kills")) %>%
group_by(NTA) %>%
summarize(total_sirr_score = sum(SIRR_Score))
## `summarise()` ungrouping output (override with `.groups` argument)
#Only the Dongan Hills, St. George, Great Kills, and Tottenville stations are ADA compliant. These stations have been renovated with elevators and/or ramps.
#(Source: https://en.wikipedia.org/wiki/Staten_Island_Railway#:~:text=Only%20the%20Dongan%20Hills%2C%20St,have%20elevators%20and%2For%20ramps)
NTA_W_Ferry_Bus_ADASubway_SIRR_Score <-left_join(NTA_W_Ferry_Bus_ADASubway_Score, ADA_Grouped_SIRR_Score, by = c("NTA_Code" = "NTA"))
NTA_W_Ferry_Bus_ADASubway_SIRR_Score["total_sirr_score"][is.na(NTA_W_Ferry_Bus_ADASubway_SIRR_Score["total_sirr_score"])] <- 0
ADA_TAI_Score <- left_join(NTA_W_Ferry_Bus_ADASubway_SIRR_Score, NTA_size, by = "NTA_Code") %>%
mutate(raw_TAI_score = ferry_score + bus_score + station_subway_score + total_sirr_score,
TAI_score_1 = (raw_TAI_score * 1000) / people_in_NTA,
subwaysirr_score = total_sirr_score + station_subway_score) %>%
adorn_rounding(digits = 0, rounding = "half up") %>%
#Omit the two remaining NAs: Rikers Island and LGA airport
na.omit() %>%
filter(!NTA_Code %in% c("SI99", "QN99", "BX99", "BK99", "MN99"))
write.csv(ADA_TAI_Score,'ADA_TAI_Score.csv')
Where are ADA-compliant public transit stations in NYC?
ADA_Graph_SIRR <- ADA_Grouped_SIRR_Score %>%
select(NTA)
ADA_Graph_Subway <- ADA_Subway_Score %>%
select(ntacode) %>%
rename(NTA=ntacode)
ADA_Graph <- rbind(ADA_Graph_SIRR, ADA_Graph_Subway) %>%
mutate(borough=substr(NTA, start = 1, stop = 2)) %>%
group_by(borough) %>%
summarise(Number_ADA=n())
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data=ADA_Graph, aes(x="", y=Number_ADA, fill=borough)) +
theme(axis.text.x=element_blank()) +
geom_bar(stat="identity", width=1, color = "white") +
coord_polar("y", start=0) +
theme_void() +
labs(x = NULL, y = NULL, fill = NULL, title = "Number of ADA Compliant Subway Stations") +
scale_fill_manual (name = "Legend", labels = c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island"), values=c("lightcyan3", "indianred", "rosybrown2", "lightsteelblue", "slategray4", "#999999")) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label = paste(round(Number_ADA, 0)), x = 1.3), position = position_stack(vjust = 0.5))