TAI Score Creation

Creating a Transit Accessibility Index (TAI) Score

We are combining data about the quantity and quality of public transit in New York City. While we could have added many other data to our model, we limited the scope to include: Subway (and Staten Island Railroad), Bus, Citibike, and Ferry.

library(tidyverse)

## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(tidycensus)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(gt)

acs_tracts_raw <- read_csv('acs_tracts_raw.csv')

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   GEOID = col_double(),
##   NAME = col_character(),
##   total_populationE = col_double(),
##   total_populationM = col_double()
## )

NTA_Census_Conversion <- read_csv('NTA_Census_Conversion.csv')

## Parsed with column specification:
## cols(
##   id2 = col_double(),
##   NTA_Code = col_character(),
##   NTA_Name = col_character()
## )

NTA_Populations <- read_csv('NTA_Population_5yrEstimate_2018.csv')

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   NTA_Code = col_character(),
##   TotalNTAPop = col_double()
## )

NTA_Jobs <-read_csv('NTA_Job_Count.csv')

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   NTACode = col_character(),
##   TotalNTAJobs = col_double()
## )

NTA_Codes <- read_csv('NTA_Codes.csv')

## Parsed with column specification:
## cols(
##   NTA_Code = col_character(),
##   NTA_Name = col_character()
## )

#Ferry data
NYC_Ferry <- read_csv('NYC_Ferry_Locations.csv')

## Parsed with column specification:
## cols(
##   `Ferry Station` = col_character(),
##   lon = col_double(),
##   lat = col_double(),
##   Line = col_character(),
##   NTA_Code = col_character(),
##   NTA_Name = col_character()
## )

#Bus data
NYC_BusShelters <- read_csv('BusShelters_NTA.csv')

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   location = col_character(),
##   segment_id = col_character(),
##   at_between = col_character(),
##   shelter_id = col_character(),
##   boro_name = col_character(),
##   street = col_character(),
##   boroname = col_character(),
##   countyfips = col_character(),
##   ntacode = col_character(),
##   ntaname = col_character()
## )

## See spec(...) for full column specifications.

NYC_Bus <- read_csv('Bus Stops, Routes, NTAs.csv')

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   stop_id = col_double(),
##   stop_lat = col_double(),
##   stop_lon = col_double(),
##   GEOID = col_double(),
##   borocode = col_double(),
##   shape_area = col_double(),
##   shape_leng = col_double(),
##   borocode_2 = col_double(),
##   shape_ar_1 = col_double(),
##   shape_le_1 = col_double()
## )
## See spec(...) for full column specifications.

Bus_Route_Ratings <- read_csv('Bus route ratings.csv')

## Parsed with column specification:
## cols(
##   Line = col_character(),
##   `Report Card` = col_character(),
##   Score = col_character(),
##   rank = col_double(),
##   `\`` = col_logical()
## )

#Subway data
NYC_Subway <- read_csv('Subway Stops with NTAs.csv')

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   `Station ID` = col_double(),
##   `Station La` = col_double(),
##   `Station Lo` = col_double(),
##   Route8 = col_double(),
##   Route9 = col_double(),
##   Route10 = col_logical(),
##   Route11 = col_logical(),
##   ADA = col_logical(),
##   `Free Cross` = col_logical(),
##   `Entrance L` = col_double(),
##   Entrance_1 = col_double(),
##   borocode = col_double(),
##   shape_area = col_double(),
##   shape_leng = col_double()
## )
## See spec(...) for full column specifications.

## Warning: 46 parsing failures.
##  row     col           expected actual                         file
## 1106 Route10 1/0/T/F/TRUE/FALSE      3 'Subway Stops with NTAs.csv'
## 1106 Route11 1/0/T/F/TRUE/FALSE      7 'Subway Stops with NTAs.csv'
## 1107 Route10 1/0/T/F/TRUE/FALSE      3 'Subway Stops with NTAs.csv'
## 1107 Route11 1/0/T/F/TRUE/FALSE      7 'Subway Stops with NTAs.csv'
## 1108 Route10 1/0/T/F/TRUE/FALSE      3 'Subway Stops with NTAs.csv'
## .... ....... .................. ...... ............................
## See problems(...) for more details.

Subway_Train_Addl_Time <-read_csv('Additional Train Time.csv')

## Parsed with column specification:
## cols(
##   month = col_character(),
##   division = col_character(),
##   line = col_character(),
##   period = col_character(),
##   num_passengers = col_double(),
##   `additional train time` = col_double()
## )

Subway_Frequency <- read_csv('Subway Frequencies.csv')

## Parsed with column specification:
## cols(
##   line = col_character(),
##   `TPH - rush hr` = col_double(),
##   `TPH offpeak` = col_double(),
##   `TPH - late nights` = col_double()
## )

SIRR_Score <- read_csv('SIRR_Score.csv')

## Parsed with column specification:
## cols(
##   Station_Name = col_character(),
##   NTA = col_character(),
##   Survey_Rate = col_double(),
##   Survey_Score = col_double(),
##   SIRR_Score = col_double()
## )

#Citibike data
NYC_Citibikes <- read_csv('CB_Stations_NTA.csv')

## Parsed with column specification:
## cols(
##   BoroCode = col_double(),
##   BoroName = col_character(),
##   CountyFIPS = col_character(),
##   NTACode = col_character(),
##   NTAName = col_character(),
##   Shape_Leng = col_double(),
##   Shape_Area = col_double(),
##   NUMPOINTS = col_double()
## )

Spatial Data

We have determined that we want to express our Transit Accessibility Index Scores by Neighborhood Tabulation Area (NTA). Therefore, to build census data into our score and interpretation of the score, we first need to denote which tracts are in each NTA.

NTACensusPop <- acs_tracts_raw %>%
  left_join(NTA_Census_Conversion, by = c("GEOID"="id2")) %>%
  select(GEOID, NAME, total_populationE, NTA_Code, NTA_Name) %>%
  group_by(NTA_Code) %>%
  summarise(TotalNTAPop = sum(total_populationE)) %>%
  ungroup()

## `summarise()` ungrouping output (override with `.groups` argument)

Ferry Score

We calculated Ferry Scores simply, as they do not have an impact on many New Yorkers transportation choices as there are limited start and end points. If an NTA has at least one ferry stop, its ferry score is 1.

Ferry_Scores <- NYC_Ferry %>%
  mutate(ferry_score = 1)

NTA_W_Ferry_Score <-
  left_join(NTA_Codes, Ferry_Scores, by="NTA_Code") %>% 
  select(NTA_Code, NTA_Name.x, ferry_score) 

#Read NA as 0
NTA_W_Ferry_Score[is.na(NTA_W_Ferry_Score)] = 0 

Cleaned_NTA_W_Ferry_Score <- NTA_W_Ferry_Score %>% 
  group_by(NTA_Code) %>% 
  summarize(ferry_score=sum(ferry_score)) %>%
  mutate_if(is.numeric, ~1 * (. != 0))

## `summarise()` ungrouping output (override with `.groups` argument)

Bus Score

To create a bus score, we will add the number of bus shelters to the sum of bus stop scores in each NTA. Bus stop scores range from 0-5, based on the data collected by the Bus Turnaround Project, including: speed, reliability, and frequency.

BusShelter_Score <- NYC_BusShelters %>%
  mutate(busshelter_count = 1) %>% 
  group_by(ntacode) %>%
  summarize(bus_shelters=sum(busshelter_count)) %>%
  mutate(
    busshelter_multiplier = 1,
    bus_shelter_score = bus_shelters* busshelter_multiplier) %>%
  select(ntacode, bus_shelter_score)

## `summarise()` ungrouping output (override with `.groups` argument)

#Join shelter scores with existing Master NTA list with ferry scores
NTA_W_Ferry_BusShelter_Score <- left_join(Cleaned_NTA_W_Ferry_Score, BusShelter_Score, by = c("NTA_Code"="ntacode"))

#Now adding bus route scores to that same Master list

Bus_Route_Score <- left_join(NYC_Bus, Bus_Route_Ratings, by = c("route_id"="Line")) %>% 
  select(route_id, ntacode, rank) %>%
  #These scores were unrated by the Bus Turnaround Project. They are part time, or unlisted buses. 
  replace_na(list(rank = 0)) %>% 
  arrange(desc(ntacode))%>%
#A few of the bus routes didn't join to NTAs because these stops are not within the 5 boroughs of NYC, so I'm removing those rows. 
  na.omit() %>%
  group_by(ntacode) %>%
  summarize(bus_route_score=sum(rank))

## `summarise()` ungrouping output (override with `.groups` argument)

NTA_W_Ferry_Bus_Score <- left_join(NTA_W_Ferry_BusShelter_Score, Bus_Route_Score, by = c("NTA_Code" = "ntacode")) %>% 
  mutate(bus_score = bus_shelter_score + bus_route_score) %>% 
  select(NTA_Code, bus_score, ferry_score)

Subway Score

The way the subway score is calculated results in it being weighted heaviest of all as the subway accounts for roughly 70% of public transit ridership.

While ADA accessible subway stations are critical for many New Yorkers, for the majority of the population, that does not factor into their transportation options, so we did not include that in our index. At the bottom of this analysis, we calculate an ADA-only score.

Cleaned_NYC_Subway <- NYC_Subway %>%
  select(`Station ID`, Route1, Route2, Route3, Route4, Route5, Route6, Route7, Route8, Route9, Route10, Route11, ADA, ntacode) %>% 
mutate(Route8 = as.character(Route8),
        Route9 = as.character(Route9),
        Route10 = as.character(Route10),
        Route11 = as.character(Route11)
      )

Subway_Line_Ratings <- Subway_Train_Addl_Time %>%
  group_by(line, period) %>% 
  summarize(Avg_Addl_Train_Time = mean(`additional train time`)) %>% 
  pivot_wider(
    names_from = period,
    names_sep = ".",
    values_from = Avg_Addl_Train_Time
  )

## `summarise()` regrouping output by 'line' (override with `.groups` argument)

#Separating out the JZ and S lines because they appear differently in the route data and we need to join these later. I started by making a dataframe with the lines mirroring the other subway csv's lines and copying the offpeak peak averages into the new dataframe. Then I'll bind the new dataframe to the bottom of the original list so it's ready to join. 

JZS_Subway_Line_Ratings <- tibble(
  line = c('J','Z', 'FS', 'GS', 'H'),
  offpeak = c(1.60704545, 1.60704545, 0.70318182, 0.43651515, 0.2940909),
  peak =c(1.75886364, 1.75886364, 0.71318182, 0.46515152, 0.1620455),
  )
  
JGS_Final_Subway_Line_Ratings <-Subway_Line_Ratings %>% 
  bind_rows(JZS_Subway_Line_Ratings) %>% 
  mutate(Line_Score = ((offpeak*.76)+ (peak*.24))) #In one week, 40 hours (24%) are peak and 128 hours (76%) are offpeak. 

#Cleaning up the other line ratings data: 
SSubway_Frequencies.df <- tibble(
line = c('FS', 'GS', 'H'),
tph_rush_hr = c(6, 20, 5),
tph_offpeak = c(6, 12, 6),
tph_late_nights = c(3, NA, 3)
)

Cleaned_Subway_Frequency <- Subway_Frequency %>% 
  clean_names()

S_Subway_Frequency <-Cleaned_Subway_Frequency %>% 
  bind_rows(SSubway_Frequencies.df)

S_Subway_Frequency[is.na(S_Subway_Frequency)] = 0 

Subway_Frequency_Score <- S_Subway_Frequency %>% 
  mutate(train_frequency_score=(tph_rush_hr + tph_offpeak + tph_late_nights))

#Now merging together two different ratings data to create final ratings for each of the lines
Total_Subway_Score <- left_join(Subway_Frequency_Score,JGS_Final_Subway_Line_Ratings, by = "line") %>% 
  filter(!line %in% c("S Rock", "S 42nd", "S Fkln")) %>% 
  mutate(subway_score=train_frequency_score+Line_Score) %>% 
  select(line,subway_score)

#Merging the rating to the data frame with the subways by NTA. 
NYC_Subway_Scores1 <- left_join(Cleaned_NYC_Subway, Total_Subway_Score, by = c( "Route1" = "line"))
NYC_Subway_Scores2 <-
left_join(NYC_Subway_Scores1, Total_Subway_Score, by = c( "Route2" = "line"))
NYC_Subway_Scores3 <-
left_join(NYC_Subway_Scores2, Total_Subway_Score, by = c( "Route3" = "line"))
NYC_Subway_Scores4 <-
left_join(NYC_Subway_Scores3, Total_Subway_Score, by = c( "Route4" = "line"))
NYC_Subway_Scores5 <-
left_join(NYC_Subway_Scores4, Total_Subway_Score, by = c( "Route5" = "line"))
NYC_Subway_Scores6 <-
left_join(NYC_Subway_Scores5, Total_Subway_Score, by = c( "Route6" = "line"))
NYC_Subway_Scores7 <-
left_join(NYC_Subway_Scores6, Total_Subway_Score, by = c( "Route7" = "line")) 
NYC_Subway_Scores8 <-
left_join(NYC_Subway_Scores7, Total_Subway_Score, by = c( "Route8" = "line"))  
NYC_Subway_Scores9 <-
left_join(NYC_Subway_Scores8, Total_Subway_Score, by = c( "Route9" = "line"))     

Cleaned_Subway_Score <- NYC_Subway_Scores9 %>% 
  clean_names() 

Cleaned_Subway_Score[c("subway_score_x", "subway_score_y", "subway_score_x_x", "subway_score_y_y", "subway_score_x_x_x", "subway_score_y_y_y", "subway_score_x_x_x_x", "subway_score_y_y_y_y", "subway_score")][is.na(Cleaned_Subway_Score[c("subway_score_x", "subway_score_y", "subway_score_x_x", "subway_score_y_y", "subway_score_x_x_x", "subway_score_y_y_y", "subway_score_x_x_x_x", "subway_score_y_y_y_y", "subway_score")])] <- 0

Final_Subway_Score <- Cleaned_Subway_Score %>% 
mutate(Summed_Subway_Score = subway_score_x + subway_score_y + subway_score_x_x + subway_score_y_y + subway_score_x_x_x + subway_score_y_y_y + subway_score_x_x_x_x + subway_score_y_y_y_y + subway_score) %>% 
  group_by(ntacode) %>% 
  summarize(station_subway_score = sum(Summed_Subway_Score))

## `summarise()` ungrouping output (override with `.groups` argument)

#Add subway score to Master Score sheet
NTA_W_Ferry_Bus_Subway_Score <- left_join(NTA_W_Ferry_Bus_Score, Final_Subway_Score, by = c("NTA_Code" = "ntacode"))
NTA_W_Ferry_Bus_Subway_Score["station_subway_score"][is.na(NTA_W_Ferry_Bus_Subway_Score["station_subway_score"])] <- 0

#Add SIRR Scores
Grouped_SIRR_Score <- SIRR_Score %>% 
  group_by(NTA) %>% 
  summarize(total_sirr_score = sum(SIRR_Score))

## `summarise()` ungrouping output (override with `.groups` argument)

NTA_W_Ferry_Bus_Subway_SIRR_Score <-left_join(NTA_W_Ferry_Bus_Subway_Score, Grouped_SIRR_Score, by = c("NTA_Code" = "NTA")) 

NTA_W_Ferry_Bus_Subway_SIRR_Score["total_sirr_score"][is.na(NTA_W_Ferry_Bus_Subway_SIRR_Score["total_sirr_score"])] <- 0

Citibike Score

Adding the Citibike Score, which is calculated by summing the number of Citibike stations per NTA.

NTA_W_Ferry_Bus_Subway_Citibike_Score <- left_join(NTA_W_Ferry_Bus_Subway_SIRR_Score, NYC_Citibikes, by = c("NTA_Code" = "NTACode")) %>% 
  mutate(citibike_score = NUMPOINTS) %>% 
  select(citibike_score, ferry_score, station_subway_score, total_sirr_score, bus_score, NTA_Code)

Final Scores!

NTA_size <- left_join(NTA_Populations, NTA_Jobs, by = c("NTA_Code" = "NTACode")) %>% 
  mutate(people_in_NTA = TotalNTAJobs + TotalNTAPop) %>% 
  select(people_in_NTA, NTA_Code)

TAI_Score <- left_join(NTA_W_Ferry_Bus_Subway_Citibike_Score, NTA_size, by = "NTA_Code") %>% 
  mutate(raw_TAI_score = ferry_score + bus_score + station_subway_score + total_sirr_score + citibike_score,
         TAI_score_1 = (raw_TAI_score * 1000) / people_in_NTA,
         subwaysirr_score = total_sirr_score + station_subway_score) %>% 
    adorn_rounding(digits = 0, rounding = "half up") %>% 
  #Omit the two remaining NAs: Rikers Island and LGA airport
  na.omit()

Tables and Maps

Before analyzing the scores, we will remove the NTAs that are parks and cemeteries (NTAs ending with 99 and 98), which are the outliers in our scores.

mapped_NTAs <- TAI_Score %>% 
  filter(NTA_Code %in% c("MN40", "MN32", "MN31", "MN33", "BK63", "BK60", "BK91", "BK96", "BK58")) %>% 
  select(!c(total_sirr_score, station_subway_score)) 
mapped_NTAs[,c(which(colnames(mapped_NTAs)=="NTA_Code"),which(colnames(mapped_NTAs)!="NTA_Code"))]

## # A tibble: 9 x 8
##   NTA_Code citibike_score ferry_score bus_score people_in_NTA raw_TAI_score
##   <chr>             <dbl>       <dbl>     <dbl>         <dbl>         <dbl>
## 1 BK58                  0           0       267         84399           267
## 2 BK60                  4           0       215         84987           975
## 3 BK63                  5           0        90         46701           796
## 4 BK91                  0           0       105         63539           253
## 5 BK96                  0           0       179         63141           179
## 6 MN31                 17           1       155        142020           200
## 7 MN32                 14           1       103         99238           118
## 8 MN33                 17           0        99         92807           266
## 9 MN40                 15           0        81        123064          2715
## # … with 2 more variables: TAI_score_1 <dbl>, subwaysirr_score <dbl>

write.csv(mapped_NTAs, 'mapped_NTAs.csv')

Categorizing the scores:

 Summarized_TAI <- TAI_Score %>% 
  #mutate(score_category = TAI_score_1) %>% 
  mutate(score_cat = ifelse(TAI_score_1<5, "Low", ifelse(TAI_score_1<9,"Below Average", ifelse(TAI_score_1<15,"Average", ifelse(TAI_score_1<23,"Above Average", ifelse(TAI_score_1<35,"Strong", "Other")))))
         ) %>% 
  mutate(borough=substr(NTA_Code, start = 1, stop = 2)) %>% 
  filter(score_cat!="Other") 

graphed_scores <- ggplot(data = Summarized_TAI, aes(score_cat)) +
  geom_bar(stat="count", width = .75) +
  theme_minimal() + 
  labs(x=NULL, y= NULL, title = "Breakdown of Neighborhood Tabulation Areas in NYC by TAI Score") +
  scale_x_discrete(limits=c("Low", "Below Average", "Average", "Above Average", "Strong")) +
  theme(plot.title = element_text(hjust = 0.5)) +
 scale_y_continuous(limits=c(0,90))

graphed_scores

Breakdown of the worst scores:

#Where are the very "low" and "below average" scores 
graphed_boro <- Summarized_TAI %>% 
  filter(score_cat %in% c("Low", "Below Average"))
 
table_weak <- graphed_boro %>% 
  group_by(borough) %>% 
  summarize (Number = n()) %>% 
  mutate(percent_of_weak_score = (Number/121)*100) %>% 
  adorn_rounding(digits = 0, rounding = "half up")

## `summarise()` ungrouping output (override with `.groups` argument)

#Total = 121 Low and Below Average Scores
# BK: 30(25%)
#BX 24 (20%)
# MN: 13(11%)
#QN: 46 (38%)
#SI: 8 (7%)
  
 ggplot(table_weak, aes(x="", y=Number, fill=borough)) +
 theme(axis.text.x=element_blank()) +
   geom_bar(stat="identity", width=1, color = "white") +
  coord_polar("y", start=0) +
   theme_void() +
   labs(x = NULL, y = NULL, fill = NULL, title = "Breakdown of 'Low' and 'Below Average' NTAs") +
   scale_fill_manual (name = "Legend", labels = c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island"), values=c("lightcyan3", "indianred", "rosybrown2", "lightsteelblue", "slategray4", "#999999")) +
   theme(plot.title = element_text(hjust = 0.5)) +
   geom_text(aes(label = paste(round(Number/ sum(Number) * 100, 0), "%"), x = 1.3),
            position = position_stack(vjust = 0.5))

#Avg score per borough
Table_Mean <- Summarized_TAI %>% 
  group_by(borough) %>% 
  summarize(mean_score = mean(TAI_score_1))

## `summarise()` ungrouping output (override with `.groups` argument)

Table_Median <- Summarized_TAI %>% 
  group_by(borough) %>% 
  summarize(median_score = median(TAI_score_1))

## `summarise()` ungrouping output (override with `.groups` argument)

Table_boros <- left_join(Table_Mean, Table_Median, by = "borough") %>% 
  adorn_rounding(digits = 0, rounding = "half up") %>% 
  pivot_longer(!borough, names_to = "type", values_to = "score")

ggplot(data=Table_boros, aes(x=borough, y=score, fill=type)) +
theme_minimal() +
geom_bar(stat="identity", position="dodge", color="black") +
labs(x= NULL, y= "Score", title= "Score Breakdown by Borough") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(name = "Legend", labels = c("Mean Score", "Median Score"), values=c("skyblue3", "slategray")) +
scale_x_discrete(limits=c("MN","SI","BK","BX","QN"), labels=c("MN"="Manhattan", "SI"="Staten Island", "BK"="Brooklyn", "BX"="Bronx", "QN"="Queens"))

Top 5 table:

Top_5 <- Summarized_TAI %>% 
  arrange(desc(TAI_score_1)) %>% 
  select(NTA_Code, TAI_score_1, bus_score, subwaysirr_score, ferry_score, citibike_score) %>% 
  head() %>% 
  gt(rowname_col = "NTA_Code") %>% 
  tab_header(
    title = md("**Top Five TAI Scores**")) %>% 
  tab_spanner(label =md("*Components of TAI Score*"), columns = vars(bus_score, subwaysirr_score, ferry_score, citibike_score)) %>% 
  cols_label(NTA_Code = "NTA", TAI_score_1 = md("**TAI Score**"), bus_score = "Bus Score", subwaysirr_score = "Subway Score", ferry_score="Ferry Score", citibike_score="Citibike Score")
Top_5

Top Five TAI Scores

	TAI Score	Components of TAI Score
	TAI Score	Bus Score	Subway Score	Ferry Score	Citibike Score
BX63	32	89	1402	0	19
MN23	32	99	4745	0	28
BK38	27	232	3725	1	26
QN61	27	415	1489	0	0
BK09	26	28	997	1	12
MN13	22	144	6798	0	38

ADA-Specific Dataset

This map of public transit access is meaningless for disabled New Yorkers. While many do not meet ADA standards, all ferrys and MTA buses are required to be ADA-accessible and many feature an on-boarding ramp. Only certain subway stations are ADA accessible and Citibikes are removed from the score.

ADA_Subway_Score <- Cleaned_Subway_Score %>% 
  filter(ada == "TRUE") %>% #Filter to only ADA accessible stations.
mutate(Summed_Subway_Score = subway_score_x + subway_score_y + subway_score_x_x + subway_score_y_y + subway_score_x_x_x + subway_score_y_y_y + subway_score_x_x_x_x + subway_score_y_y_y_y + subway_score) %>% 
  group_by(ntacode) %>% 
  summarize(station_subway_score = sum(Summed_Subway_Score))

## `summarise()` ungrouping output (override with `.groups` argument)

NTA_W_Ferry_Bus_ADASubway_Score <- left_join(NTA_W_Ferry_Bus_Score, ADA_Subway_Score, by = c("NTA_Code" = "ntacode"))
NTA_W_Ferry_Bus_ADASubway_Score["station_subway_score"][is.na(NTA_W_Ferry_Bus_ADASubway_Score["station_subway_score"])] <- 0

ADA_Grouped_SIRR_Score <- SIRR_Score %>% 
  filter(Station_Name %in% c("St. George", "Tottenville", "Dongan Hills", "Great Kills")) %>% 
  group_by(NTA) %>% 
  summarize(total_sirr_score = sum(SIRR_Score))

## `summarise()` ungrouping output (override with `.groups` argument)

#Only the Dongan Hills, St. George, Great Kills, and Tottenville stations are ADA compliant. These stations have been renovated with elevators and/or ramps. 
#(Source: https://en.wikipedia.org/wiki/Staten_Island_Railway#:~:text=Only%20the%20Dongan%20Hills%2C%20St,have%20elevators%20and%2For%20ramps)

NTA_W_Ferry_Bus_ADASubway_SIRR_Score <-left_join(NTA_W_Ferry_Bus_ADASubway_Score, ADA_Grouped_SIRR_Score, by = c("NTA_Code" = "NTA")) 

NTA_W_Ferry_Bus_ADASubway_SIRR_Score["total_sirr_score"][is.na(NTA_W_Ferry_Bus_ADASubway_SIRR_Score["total_sirr_score"])] <- 0

ADA_TAI_Score <- left_join(NTA_W_Ferry_Bus_ADASubway_SIRR_Score, NTA_size, by = "NTA_Code") %>% 
  mutate(raw_TAI_score = ferry_score + bus_score + station_subway_score + total_sirr_score,
         TAI_score_1 = (raw_TAI_score * 1000) / people_in_NTA,
         subwaysirr_score = total_sirr_score + station_subway_score) %>% 
    adorn_rounding(digits = 0, rounding = "half up") %>% 
  #Omit the two remaining NAs: Rikers Island and LGA airport
  na.omit() %>% 
  filter(!NTA_Code %in% c("SI99", "QN99", "BX99", "BK99", "MN99"))

write.csv(ADA_TAI_Score,'ADA_TAI_Score.csv')

Where are ADA-compliant public transit stations in NYC?

ADA_Graph_SIRR <- ADA_Grouped_SIRR_Score %>% 
  select(NTA)

ADA_Graph_Subway <- ADA_Subway_Score %>% 
  select(ntacode) %>% 
  rename(NTA=ntacode)

ADA_Graph <- rbind(ADA_Graph_SIRR, ADA_Graph_Subway) %>% 
    mutate(borough=substr(NTA, start = 1, stop = 2)) %>% 
  group_by(borough) %>% 
  summarise(Number_ADA=n())

## `summarise()` ungrouping output (override with `.groups` argument)

ggplot(data=ADA_Graph, aes(x="", y=Number_ADA, fill=borough)) +
 theme(axis.text.x=element_blank()) +
   geom_bar(stat="identity", width=1, color = "white") +
  coord_polar("y", start=0) +
   theme_void() +
   labs(x = NULL, y = NULL, fill = NULL, title = "Number of ADA Compliant Subway Stations") + 
  scale_fill_manual (name = "Legend", labels = c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island"), values=c("lightcyan3", "indianred", "rosybrown2", "lightsteelblue", "slategray4", "#999999")) +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label = paste(round(Number_ADA, 0)), x = 1.3), position = position_stack(vjust = 0.5))