Loading Data

INSERT DESCRIPTION OF DATA HERE

mta2020 <- read.csv("https://raw.githubusercontent.com/okhaimova/DATA698/main/MTA_Daily_Ridership_Data__Beginning_2020.csv")
gas <- read.csv("https://raw.githubusercontent.com/okhaimova/DATA698/main/NYC%20Weekly%20Retail%20Gasoline%20and%20Diesel.csv",
                skip = 2, header = TRUE)
bicycle_count <- read.csv("Bicycle_Counts.csv")
counters <- read.csv("Bicycle_Counters.csv")

Cleaning the Data

MTA Ridership Beginning 2020

DESCRIPTION

mta2020 <- mta2020 %>%
  set_colnames(c("Date", "Subway", "Subway_Pre", "Buses", "Buses_Pre", "LIRR", "LIRR_Pre", 
                 "MetroNorth","MetroNorth_Pre", "Access-A-Ride", "Access-A-Ride_Pre", 
                 "Bridges_and_Tunnels", "Bridges_and_Tunnels_Pre")) %>%
  mutate(Date = as.Date(Date, format = "%m/%d/%Y")) %>%
  arrange(Date) 

NYC Weekly Retail Gasoline and Diesel

DESCRIPTION

gas <- gas %>%
  dplyr::select(1:9) %>%
  select(contains(c("Date","City"))) %>%
  set_colnames(c("Date", "All_Grades", "Regular", "Midgrade", "Premium")) %>%
  mutate(Date = as.Date(Date, format = "%b %d, %Y")) %>%
  na.omit()

Bicycle Count

DESCRIPTION -date to get rid of times to sum by date -explain filtering -join from counter data -comprehensive brooklyn bridge, comp manhattan, pulaski, ed koch, williamsburg - The Brooklyn Bridge data seemed to have an outlier on February 8, 2022 with a reading of 68,200, even though February 2022 had a daily average of 1348.2 bicycles per day. We chose to remove the outlier and impute it with the daily average of February.

# these are the ids that match the five bridge counters
bridge_ids <- c("100010018", "100062893", "100009428", "300020904", "100009427")

counters <- counters %>%
  mutate(id = as.character(id))

bicycle_count <- bicycle_count %>%
  mutate(date = as.Date(date, "%m/%d/%Y")) %>%
  group_by(id, date) %>%
  summarise(total = sum(counts)) %>%
  filter(id %in% bridge_ids) %>%
  mutate(id = as.character(id)) %>%
  left_join(counters, by = 'id') %>%
  select(c(date, id, name, total)) %>%
  filter(date > "2014-12-31")

brooklyn_febraury_avg <- bicycle_count %>%
  filter(id == "300020904",
         date >= "2022-02-01" & date < "2022-03-01",
         date != "2022-02-08") %>%
  summarise(mean = mean(total)) %>%
  select(mean) %>%
  as.numeric()

# replace the outlier
bicycle_count <- bicycle_count %>%
  mutate(total = replace(total, total == 68200, round(brooklyn_febraury_avg,0)))

Combining Weekly

mta_weekly <- mta2020 %>%
  filter(Date > "2020-03-01") %>%
  mutate(Date = yearweek(Date)) %>%
  group_by(Date) %>%
  summarise(Subway = sum(Subway),
            Buses = sum(Buses),
            LIRR = sum(LIRR),
            MetroNorth = sum(MetroNorth),
            `Access-A-Ride` = sum(`Access-A-Ride`),
            Bridges_and_Tunnels = sum(Bridges_and_Tunnels)) %>%
  mutate(MTA = rowSums(across(c(Subway, Buses))))
  
gas_weekly <- gas %>%
  filter(Date > "2020-03-01") %>%
  mutate(Date = yearweek(Date))

bike_weekly <- bicycle_count %>%
  filter(date > "2020-03-01") %>%
  mutate(date = yearweek(date)) %>%
  group_by(date) %>%
  summarise(Bikes = sum(total)) %>%
  rename(Date = date) 


weekly <- plyr::join_all(list(mta_weekly, bike_weekly, gas_weekly), 
              by = "Date", type = 'left')

Data Exploration

Gas

The price of gasoline was decreasing in the beginning of 2020, up until late February. Gasoline prices started increasing as the World Health Organization (WHO) declared the novel Coronavirus (COVID-19) outbreak as a pandemic on March 11, 2020.

It continued increasing, reaching the ultimate peak in June 2022.

Some factors that affected the gasoline prices throughout the last few years were COVID-19 related supply disruptions, the Russian-Ukrainian conflict, and the Keystone XL pipeline cancellation.

The pattern in gasoline prices are similar across the different grades of gasoline.

gas %>%
  filter(Date >= "2020-01-01") %>%
  ggplot(aes(x = Date, y = All_Grades)) +
  geom_line() +
  labs(title = "Price of Gasoline for All Grades, NYC",
       y = "$USD")

gas %>%
  filter(Date >= "2020-01-01") %>%
  pivot_longer(-Date) %>%
  filter(name != "All_Grades") %>%
  ggplot(aes(x = Date, y = value, colour = name)) +
  geom_line() +
  facet_grid(factor(name, levels =c("Regular", "Midgrade", "Premium")) ~., scales = "free_y") +
  labs(title = "Price of Gasoline, NYC",
       y = "$USD",
       color = "Gasoline Grade")

MTA

DESCRIPTION -repeat weekly for other methods -describe the charts -repeat for bridges and tunnels -fix scientific notation

mta2020 %>%
  dplyr::select(Date, Subway, Buses) %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(title = "MTA Ridership",
       y = "Count",
       color = "Mode")

mta2020 %>%
  dplyr::select(Date, LIRR, MetroNorth, "Access-A-Ride") %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(title = "MTA Ridership",
       y = "Count",
       color = "Mode")
## Warning: Removed 62 row(s) containing missing values (geom_path).

mta2020 %>%
  mutate(Date = yearweek(Date)) %>%
  group_by(Date) %>%
  summarise(Subway = sum(Subway)) %>%
  as_tsibble(index = Date) %>%
  autoplot(Subway) +
  labs(title = "Weekly Subway Ridership",
       y = "Count")

mta2020 %>%
  as_tsibble(index = Date) %>%
  model(STL(Subway ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  labs(title = "STL Decomposition")

Bicycle

-rename the labels

The five bridges seems to have a similar seasonality pattern. There is a peak of bicycle usage in the summer and a decline in the winter. Brooklyn Bridge, Manhattan Bridge, and Williamsburg Bridge all connect Brooklyn and Manhattan. The Pulaski Bridge connects Brooklyn and Queens. The Ed Koch Queensboro Bridge connects Queens and Manhattan. It is also the only bridge that seems to have an apparent increase in usage from 2020 to the present.

bicycle_count %>%
  ggplot(aes(x = date, y = total, color = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y")

#dcmp <- bicycle_count %>%
#  as_tsibble(index = date, key = c(id, name)) %>%
#  fill_gaps() %>%
#  model(stl = STL(total))

weekly %>%
  as_tsibble(index = Date) %>%
  model(STL(Bikes ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  labs(title = "STL Decomposition")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = MTA), color = "steelblue") +
  geom_line(aes(y = All_Grades * 10000000), color = "darkred") +
  scale_y_continuous(name = "MTA Subway & Bus Ridership",
                     sec.axis = sec_axis(~./10000000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Buses), color = "steelblue") +
  geom_line(aes(y = All_Grades * 5000000), color = "darkred") +
  scale_y_continuous(name = "MTA Bus Ridership",
                     sec.axis = sec_axis(~./5000000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Subway), color = "steelblue") +
  geom_line(aes(y = All_Grades * 5000000), color = "darkred") +
  scale_y_continuous(name = "MTA Subway Ridership",
                     sec.axis = sec_axis(~./5000000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Bikes), color = "steelblue") +
  geom_line(aes(y = All_Grades * 100000), color = "darkred") +
  scale_y_continuous(name = "Bike Ridership",
                     sec.axis = sec_axis(~./100000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = LIRR), color = "steelblue") +
  geom_line(aes(y = All_Grades * 500000), color = "darkred") +
  scale_y_continuous(name = "LIRR Ridership",
                     sec.axis = sec_axis(~./500000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")
## Warning: Removed 5 row(s) containing missing values (geom_path).

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = MetroNorth), color = "steelblue") +
  geom_line(aes(y = All_Grades * 500000), color = "darkred") +
  scale_y_continuous(name = "MetroNorth Ridership",
                     sec.axis = sec_axis(~./500000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")
## Warning: Removed 5 row(s) containing missing values (geom_path).

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Bridges_and_Tunnels), color = "steelblue") +
  geom_line(aes(y = All_Grades * 1000000), color = "darkred") +
  scale_y_continuous(name = "Traffic",
                     sec.axis = sec_axis(~./1000000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

weekly %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = `Access-A-Ride`), color = "steelblue") +
  geom_line(aes(y = All_Grades * 50000), color = "darkred") +
  scale_y_continuous(name = "Traffic",
                     sec.axis = sec_axis(~./50000, name = "Price of Gasoline")) +
  labs(title = "Price of Gasoline for All Grades, NYC")

-back to work January 21 -omicron end of 21

weekly %>%
  as_tsibble(index = Date) %>%
  model(classical_decomposition(Subway, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical Additive Decomposition of Subway Ridership")
## Warning: Removed 26 row(s) containing missing values (geom_path).

lambda <- weekly %>%
  as_tsibble(index = Date) %>%
  features(Subway, features = guerrero) %>%
  pull(lambda_guerrero)

#stl decomp applied to the box cox transformed data
weekly %>%
  as_tsibble(index = Date) %>%
  model(STL(box_cox(Subway,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  ggtitle("STL with Box-Cox")

weekly %>%
  as_tsibble(index = Date) %>%
  gg_tsdisplay(difference(Subway), plot_type='partial') +
  labs(title = "hi")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

Modeling

Subway Linear

model1<- lm(Subway ~ All_Grades, weekly)

summary(model1)
## 
## Call:
## lm(formula = Subway ~ All_Grades, data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6500633 -1706335  -250356  1491752 21217419 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3005632    1166011  -2.578    0.011 *  
## All_Grades   5453226     354684  15.375   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3560000 on 143 degrees of freedom
## Multiple R-squared:  0.6231, Adjusted R-squared:  0.6204 
## F-statistic: 236.4 on 1 and 143 DF,  p-value: < 2.2e-16

Subway Linear w/ Log Gas

model2 <- lm(Subway ~ log(All_Grades), weekly)

summary(model2)
## 
## Call:
## lm(formula = Subway ~ log(All_Grades), data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5842698 -1769687  -545468  1809551 21186135 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5960240    1213512  -4.912 2.44e-06 ***
## log(All_Grades) 18076735    1052659  17.172  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3313000 on 143 degrees of freedom
## Multiple R-squared:  0.6734, Adjusted R-squared:  0.6712 
## F-statistic: 294.9 on 1 and 143 DF,  p-value: < 2.2e-16
model2a<- lm(Subway ~ log(Regular) + log(Midgrade) + log(Premium), weekly)

summary(model2a)
## 
## Call:
## lm(formula = Subway ~ log(Regular) + log(Midgrade) + log(Premium), 
##     data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5460247 -2093682  -233210  1387776 22069845 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -1324492    4466350  -0.297  0.76725   
## log(Regular)    58415600   19912453   2.934  0.00391 **
## log(Midgrade) -129765537   42229867  -3.073  0.00255 **
## log(Premium)    85825572   26970956   3.182  0.00180 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3222000 on 141 degrees of freedom
## Multiple R-squared:  0.6954, Adjusted R-squared:  0.6889 
## F-statistic: 107.3 on 3 and 141 DF,  p-value: < 2.2e-16

MTA Linear w Log Gas

model3 <- lm(MTA ~ log(All_Grades), weekly)

summary(model3)
## 
## Call:
## lm(formula = MTA ~ log(All_Grades), data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9147120 -2386471  -357666  2245694 27741690 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -2658363    1624774  -1.636    0.104    
## log(All_Grades) 21601961    1409408  15.327   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4436000 on 143 degrees of freedom
## Multiple R-squared:  0.6216, Adjusted R-squared:  0.619 
## F-statistic: 234.9 on 1 and 143 DF,  p-value: < 2.2e-16
model3a<- lm(MTA ~ log(Regular) + log(Midgrade) + log(Premium), weekly)

summary(model3a)
## 
## Call:
## lm(formula = MTA ~ log(Regular) + log(Midgrade) + log(Premium), 
##     data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7931138 -2764487  -282822  2081546 28740439 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      3848972    6036809   0.638  0.52478   
## log(Regular)    71764365   26914073   2.666  0.00856 **
## log(Midgrade) -151227525   57078741  -2.649  0.00898 **
## log(Premium)    96459509   36454488   2.646  0.00907 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4356000 on 141 degrees of freedom
## Multiple R-squared:  0.6403, Adjusted R-squared:  0.6327 
## F-statistic: 83.68 on 3 and 141 DF,  p-value: < 2.2e-16

Bus Linear

model4<- lm(Buses ~ All_Grades, weekly)

summary(model4)
## 
## Call:
## lm(formula = Buses ~ All_Grades, data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3434353  -581950    45629   571264  6560564 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3883422     422815   9.185 4.32e-16 ***
## All_Grades   1061775     128614   8.256 9.14e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1291000 on 143 degrees of freedom
## Multiple R-squared:  0.3228, Adjusted R-squared:  0.318 
## F-statistic: 68.15 on 1 and 143 DF,  p-value: 9.136e-14

Log Bus Linear w/ Log Gas

model5 <- lm(log(Buses) ~ log(All_Grades), weekly)

summary(model5)
## 
## Call:
## lm(formula = log(Buses) ~ log(All_Grades), data = weekly)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74008 -0.07283  0.01487  0.11235  0.72958 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     15.13670    0.07369  205.42  < 2e-16 ***
## log(All_Grades)  0.56439    0.06392    8.83  3.4e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2012 on 143 degrees of freedom
## Multiple R-squared:  0.3528, Adjusted R-squared:  0.3483 
## F-statistic: 77.97 on 1 and 143 DF,  p-value: 3.402e-15

LIRR Linear

model6<- lm(LIRR ~ All_Grades, weekly)

summary(model6)
## 
## Call:
## lm(formula = LIRR ~ All_Grades, data = weekly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -340309 -102596   -1990  107723  375142 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -300636      48838  -6.156 7.64e-09 ***
## All_Grades    311894      14734  21.168  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 145500 on 138 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.7645, Adjusted R-squared:  0.7628 
## F-statistic: 448.1 on 1 and 138 DF,  p-value: < 2.2e-16
model6a<- lm(LIRR ~ Regular + Midgrade + Premium, weekly)

summary(model6a)
## 
## Call:
## lm(formula = LIRR ~ Regular + Midgrade + Premium, data = weekly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -330374 -103784   12247  105803  231467 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -264278     101162  -2.612  0.01000 *  
## Regular       653681     346045   1.889  0.06102 .  
## Midgrade    -2036904     634659  -3.209  0.00166 ** 
## Premium      1630259     328994   4.955 2.11e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 130800 on 136 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8124, Adjusted R-squared:  0.8083 
## F-statistic: 196.3 on 3 and 136 DF,  p-value: < 2.2e-16

LIRR Linear w/ Log Gas

model7 <- lm(LIRR ~ log(All_Grades), weekly)

summary(model7)
## 
## Call:
## lm(formula = LIRR ~ log(All_Grades), data = weekly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -308005  -97987  -12208  104345  342203 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -450624      50553  -8.914 2.62e-15 ***
## log(All_Grades)  1016695      43526  23.358  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134800 on 138 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.7981, Adjusted R-squared:  0.7967 
## F-statistic: 545.6 on 1 and 138 DF,  p-value: < 2.2e-16

MetroNorth Linear

model8<- lm(MetroNorth ~ Regular + Midgrade + Premium, weekly)

summary(model8)
## 
## Call:
## lm(formula = MetroNorth ~ Regular + Midgrade + Premium, data = weekly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -308891  -82475    7096   86803  231638 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -395226      87398  -4.522 1.33e-05 ***
## Regular       272938     298882   0.913  0.36277    
## Midgrade    -1544542     548093  -2.818  0.00556 ** 
## Premium      1494163     284182   5.258 5.56e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 113000 on 135 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.8362, Adjusted R-squared:  0.8326 
## F-statistic: 229.7 on 3 and 135 DF,  p-value: < 2.2e-16

MetroNorth Linear w/ Log Gas

model9 <- lm(MetroNorth ~ log(All_Grades), weekly)

summary(model9)
## 
## Call:
## lm(formula = MetroNorth ~ log(All_Grades), data = weekly)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -219177  -83177  -18060   71372  347781 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -465334      46473  -10.01   <2e-16 ***
## log(All_Grades)   937441      40017   23.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 123900 on 137 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.8002, Adjusted R-squared:  0.7988 
## F-statistic: 548.8 on 1 and 137 DF,  p-value: < 2.2e-16

Bike Linear

model10<- lm(Bikes ~ All_Grades, weekly)

summary(model10)
## 
## Call:
## lm(formula = Bikes ~ All_Grades, data = weekly)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -98947 -37539   9184  37377  82968 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   101684      15288   6.651 5.72e-10 ***
## All_Grades     11013       4650   2.368   0.0192 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46670 on 143 degrees of freedom
## Multiple R-squared:  0.03774,    Adjusted R-squared:  0.03101 
## F-statistic: 5.608 on 1 and 143 DF,  p-value: 0.01921

Bike Linear w/ Log Gas

model11a<- lm(Bikes ~ log(Regular) + log(Midgrade) + log(Premium), weekly)

summary(model11a)
## 
## Call:
## lm(formula = Bikes ~ log(Regular) + log(Midgrade) + log(Premium), 
##     data = weekly)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -94997 -36014  10448  32755  82895 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -97063      63046  -1.540  0.12591   
## log(Regular)   -878671     281079  -3.126  0.00215 **
## log(Midgrade)  1317391     596106   2.210  0.02872 * 
## log(Premium)   -316000     380715  -0.830  0.40793   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45490 on 141 degrees of freedom
## Multiple R-squared:  0.0987, Adjusted R-squared:  0.07953 
## F-statistic: 5.147 on 3 and 141 DF,  p-value: 0.002087

Subway Linear w/ Gas Trend Season

“An alternative to using seasonal dummy variables, especially for long seasonal periods, is to use Fourier terms.

If m is the seasonal period

With Fourier terms, we often need fewer predictors than with dummy variables, especially when
m is large. This makes them useful for weekly data, for example, where m is 52 ”

weekly_ts <- weekly %>%
  as_tsibble(index = Date)

model_subway <- weekly_ts %>% 
  model(TSLM(Subway ~ log(All_Grades) + trend() + fourier(K = 13)))

model_subway %>%
  report()
## Series: Subway 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6441451 -1083652  -139580  1210269 15878276 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -8596036    2186556  -3.931 0.000144 ***
## log(All_Grades)       21970549    3192232   6.883 3.19e-10 ***
## trend()                 -24419      20431  -1.195 0.234448    
## fourier(K = 13)C1_52   2435610     436703   5.577 1.62e-07 ***
## fourier(K = 13)S1_52    371308     333058   1.115 0.267221    
## fourier(K = 13)C2_52    873333     320661   2.724 0.007457 ** 
## fourier(K = 13)S2_52   -888498     314427  -2.826 0.005556 ** 
## fourier(K = 13)C3_52    988170     324421   3.046 0.002872 ** 
## fourier(K = 13)S3_52   -146845     318396  -0.461 0.645517    
## fourier(K = 13)C4_52   -257150     319913  -0.804 0.423150    
## fourier(K = 13)S4_52    412583     318455   1.296 0.197694    
## fourier(K = 13)C5_52   -417787     321647  -1.299 0.196554    
## fourier(K = 13)S5_52   -567000     317463  -1.786 0.076707 .  
## fourier(K = 13)C6_52    191356     316929   0.604 0.547167    
## fourier(K = 13)S6_52    147405     319413   0.461 0.645314    
## fourier(K = 13)C7_52   -628585     316465  -1.986 0.049361 *  
## fourier(K = 13)S7_52    118517     316402   0.375 0.708659    
## fourier(K = 13)C8_52    517017     317199   1.630 0.105826    
## fourier(K = 13)S8_52   -278258     315925  -0.881 0.380262    
## fourier(K = 13)C9_52    372167     315804   1.178 0.241019    
## fourier(K = 13)S9_52    438178     317315   1.381 0.169967    
## fourier(K = 13)C10_52  -275831     316698  -0.871 0.385576    
## fourier(K = 13)S10_52   172021     317296   0.542 0.588757    
## fourier(K = 13)C11_52   410817     317312   1.295 0.198003    
## fourier(K = 13)S11_52   -59139     314933  -0.188 0.851376    
## fourier(K = 13)C12_52  -244216     315173  -0.775 0.439997    
## fourier(K = 13)S12_52   317385     315522   1.006 0.316555    
## fourier(K = 13)C13_52  -514290     313260  -1.642 0.103353    
## fourier(K = 13)S13_52  -287869     315513  -0.912 0.363458    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2659000 on 116 degrees of freedom
## Multiple R-squared: 0.8293,  Adjusted R-squared: 0.7881
## F-statistic: 20.13 on 28 and 116 DF, p-value: < 2.22e-16

Buses Linear w/ Gas Trend Season

model_buses <- weekly_ts %>%
  model(TSLM(Buses ~ log(All_Grades) + trend() + fourier(K = 13))) 

model_buses %>%
  report()
## Series: Buses 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2555289  -611400    23943   628375  4566947 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1814916     904396   2.007 0.047100 *  
## log(All_Grades)        5855225    1320359   4.435 2.11e-05 ***
## trend()                 -16099       8450  -1.905 0.059250 .  
## fourier(K = 13)C1_52    725307     180627   4.015 0.000106 ***
## fourier(K = 13)S1_52   -360876     137758  -2.620 0.009979 ** 
## fourier(K = 13)C2_52    118382     132631   0.893 0.373937    
## fourier(K = 13)S2_52   -349839     130052  -2.690 0.008200 ** 
## fourier(K = 13)C3_52    436003     134186   3.249 0.001514 ** 
## fourier(K = 13)S3_52     10802     131694   0.082 0.934772    
## fourier(K = 13)C4_52    -38218     132321  -0.289 0.773226    
## fourier(K = 13)S4_52    197964     131718   1.503 0.135573    
## fourier(K = 13)C5_52    -52804     133038  -0.397 0.692167    
## fourier(K = 13)S5_52   -238280     131308  -1.815 0.072159 .  
## fourier(K = 13)C6_52     49594     131087   0.378 0.705881    
## fourier(K = 13)S6_52     66475     132115   0.503 0.615806    
## fourier(K = 13)C7_52   -288908     130895  -2.207 0.029271 *  
## fourier(K = 13)S7_52      5136     130869   0.039 0.968760    
## fourier(K = 13)C8_52    177146     131199   1.350 0.179577    
## fourier(K = 13)S8_52    -88336     130672  -0.676 0.500377    
## fourier(K = 13)C9_52    124004     130622   0.949 0.344422    
## fourier(K = 13)S9_52    161555     131247   1.231 0.220840    
## fourier(K = 13)C10_52   -57028     130992  -0.435 0.664111    
## fourier(K = 13)S10_52    94051     131239   0.717 0.475039    
## fourier(K = 13)C11_52   166945     131246   1.272 0.205917    
## fourier(K = 13)S11_52    -5866     130262  -0.045 0.964160    
## fourier(K = 13)C12_52  -133921     130361  -1.027 0.306411    
## fourier(K = 13)S12_52    53579     130505   0.411 0.682158    
## fourier(K = 13)C13_52  -169675     129569  -1.310 0.192942    
## fourier(K = 13)S13_52  -163281     130501  -1.251 0.213385    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1100000 on 116 degrees of freedom
## Multiple R-squared: 0.601,   Adjusted R-squared: 0.5047
## F-statistic: 6.241 on 28 and 116 DF, p-value: 6.4776e-13

LIRR Linear w/ Gas Trend Season

model_LIRR <- weekly_ts %>%
  model(TSLM(LIRR ~ log(All_Grades) + trend() + fourier(K = 13))) 

model_LIRR %>%
  report()
## Series: LIRR 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -262752  -33553    8992   37419  122348 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -205648.06   54901.56  -3.746 0.000287 ***
## log(All_Grades)        590505.35   81606.78   7.236 6.36e-11 ***
## trend()                  3005.79     532.59   5.644 1.29e-07 ***
## fourier(K = 13)C1_52    96741.37   10805.16   8.953 9.11e-15 ***
## fourier(K = 13)S1_52   -73107.97    8411.17  -8.692 3.60e-14 ***
## fourier(K = 13)C2_52    38882.02    7898.89   4.922 2.99e-06 ***
## fourier(K = 13)S2_52   -13197.36    8001.25  -1.649 0.101890    
## fourier(K = 13)C3_52    25910.97    8045.66   3.220 0.001679 ** 
## fourier(K = 13)S3_52     5174.01    7968.43   0.649 0.517477    
## fourier(K = 13)C4_52   -13264.00    7991.57  -1.660 0.099788 .  
## fourier(K = 13)S4_52     3037.37    7927.61   0.383 0.702350    
## fourier(K = 13)C5_52    -9942.31    8019.91  -1.240 0.217700    
## fourier(K = 13)S5_52     3264.50    7926.41   0.412 0.681243    
## fourier(K = 13)C6_52    -7024.73    7953.88  -0.883 0.379046    
## fourier(K = 13)S6_52       21.84    7938.63   0.003 0.997810    
## fourier(K = 13)C7_52    -3398.20    7961.77  -0.427 0.670342    
## fourier(K = 13)S7_52     -798.02    7900.94  -0.101 0.919730    
## fourier(K = 13)C8_52     6486.69    7908.91   0.820 0.413874    
## fourier(K = 13)S8_52     4946.19    7943.85   0.623 0.534795    
## fourier(K = 13)C9_52     -981.28    7882.42  -0.124 0.901152    
## fourier(K = 13)S9_52    -6457.10    7958.78  -0.811 0.418919    
## fourier(K = 13)C10_52   -4023.83    7922.99  -0.508 0.612554    
## fourier(K = 13)S10_52    6511.31    7933.81   0.821 0.413574    
## fourier(K = 13)C11_52    1083.13    7934.94   0.137 0.891672    
## fourier(K = 13)S11_52    4921.85    7899.55   0.623 0.534527    
## fourier(K = 13)C12_52   -3151.81    7911.84  -0.398 0.691126    
## fourier(K = 13)S12_52  -10000.04    7897.50  -1.266 0.208082    
## fourier(K = 13)C13_52   -1406.23    7863.45  -0.179 0.858396    
## fourier(K = 13)S13_52    -815.53    7857.71  -0.104 0.917525    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 65180 on 111 degrees of freedom
## Multiple R-squared: 0.962,   Adjusted R-squared: 0.9524
## F-statistic: 100.4 on 28 and 111 DF, p-value: < 2.22e-16

MetroNorth Linear w/ Gas Trend Season

model_metronorth <- weekly_ts %>%
  model(TSLM(MetroNorth ~ log(All_Grades) + trend() + fourier(K = 13))) 

model_metronorth %>%
  report()
## Series: MetroNorth 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -229462  -33600    1265   28719   98358 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -120950.1    44942.6  -2.691  0.00823 ** 
## log(All_Grades)        349232.5    66846.6   5.224 8.36e-07 ***
## trend()                  4115.9      436.2   9.436 7.67e-16 ***
## fourier(K = 13)C1_52    56954.9     8870.9   6.420 3.55e-09 ***
## fourier(K = 13)S1_52   -70309.6     6891.3 -10.203  < 2e-16 ***
## fourier(K = 13)C2_52    57165.4     6458.2   8.852 1.66e-14 ***
## fourier(K = 13)S2_52   -11997.7     6576.6  -1.824  0.07082 .  
## fourier(K = 13)C3_52    27537.7     6622.1   4.158 6.37e-05 ***
## fourier(K = 13)S3_52     1502.9     6515.5   0.231  0.81801    
## fourier(K = 13)C4_52    -1704.4     6550.1  -0.260  0.79518    
## fourier(K = 13)S4_52    -6097.4     6498.5  -0.938  0.35016    
## fourier(K = 13)C5_52   -10471.5     6553.2  -1.598  0.11293    
## fourier(K = 13)S5_52     -687.8     6519.9  -0.105  0.91618    
## fourier(K = 13)C6_52    -6583.2     6533.1  -1.008  0.31582    
## fourier(K = 13)S6_52    -2618.9     6494.0  -0.403  0.68752    
## fourier(K = 13)C7_52    -2573.9     6537.2  -0.394  0.69454    
## fourier(K = 13)S7_52      237.2     6465.2   0.037  0.97080    
## fourier(K = 13)C8_52     8729.0     6463.1   1.351  0.17960    
## fourier(K = 13)S8_52     4662.0     6535.8   0.713  0.47717    
## fourier(K = 13)C9_52     2192.6     6465.6   0.339  0.73517    
## fourier(K = 13)S9_52    -2036.5     6522.3  -0.312  0.75545    
## fourier(K = 13)C10_52   -2735.5     6512.7  -0.420  0.67529    
## fourier(K = 13)S10_52    1689.3     6485.9   0.260  0.79499    
## fourier(K = 13)C11_52    7651.7     6489.9   1.179  0.24093    
## fourier(K = 13)S11_52    3168.5     6494.2   0.488  0.62660    
## fourier(K = 13)C12_52   -7470.8     6479.9  -1.153  0.25144    
## fourier(K = 13)S12_52   -9016.5     6481.3  -1.391  0.16698    
## fourier(K = 13)C13_52   -6570.7     6472.8  -1.015  0.31227    
## fourier(K = 13)S13_52   -4050.9     6420.7  -0.631  0.52941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53260 on 110 degrees of freedom
## Multiple R-squared: 0.9703,  Adjusted R-squared: 0.9628
## F-statistic: 128.6 on 28 and 110 DF, p-value: < 2.22e-16

Bikes Linear w/ Gas Trend Season

-no trend in bikes, but big seasonality factor

model_bikes <- weekly_ts %>%
  model(TSLM(Bikes ~ All_Grades + fourier(K = 13))) 

model_bikes %>%
  report()
## Series: Bikes 
## Model: TSLM 
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -49204 -13645   1114  14096  38322 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           111508.06    6862.47  16.249  < 2e-16 ***
## All_Grades              6383.19    2095.60   3.046  0.00287 ** 
## fourier(K = 13)C1_52    7239.23    2383.01   3.038  0.00294 ** 
## fourier(K = 13)S1_52  -59914.07    2559.95 -23.404  < 2e-16 ***
## fourier(K = 13)C2_52   12048.64    2481.47   4.855 3.75e-06 ***
## fourier(K = 13)S2_52   -3981.63    2445.33  -1.628  0.10616    
## fourier(K = 13)C3_52    4480.24    2487.67   1.801  0.07428 .  
## fourier(K = 13)S3_52   -5275.39    2447.11  -2.156  0.03315 *  
## fourier(K = 13)C4_52   -4101.43    2451.48  -1.673  0.09699 .  
## fourier(K = 13)S4_52     276.45    2475.79   0.112  0.91128    
## fourier(K = 13)C5_52    2846.03    2460.11   1.157  0.24968    
## fourier(K = 13)S5_52   -2104.75    2464.17  -0.854  0.39477    
## fourier(K = 13)C6_52    -350.44    2463.83  -0.142  0.88714    
## fourier(K = 13)S6_52    3320.21    2459.92   1.350  0.17971    
## fourier(K = 13)C7_52   -4113.51    2461.63  -1.671  0.09738 .  
## fourier(K = 13)S7_52    3068.13    2460.84   1.247  0.21497    
## fourier(K = 13)C8_52    3914.60    2464.93   1.588  0.11496    
## fourier(K = 13)S8_52   -1198.94    2456.97  -0.488  0.62648    
## fourier(K = 13)C9_52    3520.93    2456.47   1.433  0.15443    
## fourier(K = 13)S9_52    1727.70    2465.93   0.701  0.48493    
## fourier(K = 13)C10_52     13.56    2456.08   0.006  0.99560    
## fourier(K = 13)S10_52   2553.64    2465.11   1.036  0.30238    
## fourier(K = 13)C11_52   2548.59    2468.03   1.033  0.30390    
## fourier(K = 13)S11_52   3403.84    2448.34   1.390  0.16709    
## fourier(K = 13)C12_52  -6508.25    2451.59  -2.655  0.00904 ** 
## fourier(K = 13)S12_52  -2220.57    2454.26  -0.905  0.36744    
## fourier(K = 13)C13_52  -2792.50    2436.67  -1.146  0.25412    
## fourier(K = 13)S13_52    310.19    2453.73   0.126  0.89962    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20690 on 117 degrees of freedom
## Multiple R-squared: 0.8453,  Adjusted R-squared: 0.8096
## F-statistic: 23.68 on 27 and 117 DF, p-value: < 2.22e-16

Comparisons

Linear

weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ All_Grades, data = weekly)) %>%
  map(glance) %>%
  do.call(rbind.data.frame, .) %>%
  rownames_to_column() 
## # A tibble: 8 x 13
##   rowname r.squared adj.r.squared  sigma statistic  p.value    df logLik   AIC
##   <chr>       <dbl>         <dbl>  <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl>
## 1 Subway     0.623         0.620  3.56e6    236.   4.23e-32     1 -2392. 4790.
## 2 Buses      0.323         0.318  1.29e6     68.2  9.14e-14     1 -2245. 4496.
## 3 LIRR       0.765         0.763  1.46e5    448.   3.55e-45     1 -1862. 3730.
## 4 MetroN~    0.772         0.770  1.32e5    464.   8.28e-46     1 -1835. 3677.
## 5 Access~    0.376         0.372  2.32e4     85.7  2.96e-16     1 -1651. 3308.
## 6 Bridge~    0.431         0.427  7.91e5    108.   3.12e-19     1 -2174. 4354.
## 7 MTA        0.575         0.572  4.70e6    193.   2.42e-28     1 -2432. 4871.
## 8 Bikes      0.0377        0.0310 4.67e4      5.61 1.92e- 2     1 -1764. 3533.
## # ... with 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>,
## #   nobs <int>
weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ All_Grades, data = weekly)) %>% 
  map(glance) %>% 
  do.call(rbind.data.frame, .) %>% 
  rownames_to_column %>% 
  as_tibble %>% 
  ggplot(aes(x = reorder(rowname, -r.squared), y = r.squared)) +
  geom_col() +
  coord_flip() + 
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Mode of Transportation",
       y = expression( R^{2}),
       title = expression(paste("Comparison of ", R^{2})))

Linear Breakdown by Gas Type

weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ Regular + Midgrade + Premium, data = weekly)) %>%
  map(glance) %>%
  do.call(rbind.data.frame, .) %>%
  rownames_to_column() 
## # A tibble: 8 x 13
##   rowname r.squared adj.r.squared  sigma statistic  p.value    df logLik   AIC
##   <chr>       <dbl>         <dbl>  <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl>
## 1 Subway     0.643         0.635  3.49e6     84.6  2.28e-31     3 -2388. 4786.
## 2 Buses      0.329         0.315  1.29e6     23.0  3.44e-12     3 -2244. 4499.
## 3 LIRR       0.812         0.808  1.31e5    196.   3.18e-49     3 -1846. 3702.
## 4 MetroN~    0.836         0.833  1.13e5    230.   7.89e-53     3 -1812. 3635.
## 5 Access~    0.516         0.505  2.06e4     49.7  6.23e-22     3 -1633. 3275.
## 6 Bridge~    0.478         0.467  7.63e5     43.1  8.08e-20     3 -2168. 4345.
## 7 MTA        0.591         0.582  4.64e6     67.9  3.13e-27     3 -2430. 4869.
## 8 Bikes      0.0908        0.0715 4.57e4      4.69 3.73e- 3     3 -1760. 3529.
## # ... with 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>,
## #   nobs <int>
weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ Regular + Midgrade + Premium, data = weekly)) %>% 
  map(glance) %>% 
  do.call(rbind.data.frame, .) %>% 
  rownames_to_column %>% 
  as_tibble %>% 
  ggplot(aes(x = reorder(rowname, -adj.r.squared), y = adj.r.squared)) +
  geom_col() +
  coord_flip() + 
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Mode of Transportation",
       y = expression(paste("Adjusted ", R^{2})),
       title = expression(paste("Comparison of Adjusted ", R^{2})))

Linear-Log

weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ log(All_Grades), data = weekly)) %>%
  map(glance) %>%
  do.call(rbind.data.frame, .) %>%
  rownames_to_column() 
## # A tibble: 8 x 13
##   rowname r.squared adj.r.squared  sigma statistic  p.value    df logLik   AIC
##   <chr>       <dbl>         <dbl>  <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl>
## 1 Subway     0.673         0.671  3.31e6    295.   1.43e-36     1 -2382. 4769.
## 2 Buses      0.350         0.345  1.26e6     77.0  4.69e-15     1 -2242. 4490.
## 3 LIRR       0.798         0.797  1.35e5    546.   8.50e-50     1 -1851. 3708.
## 4 MetroN~    0.800         0.799  1.24e5    549.   9.26e-50     1 -1826. 3659.
## 5 Access~    0.414         0.409  2.25e4    100.   3.58e-18     1 -1646. 3299.
## 6 Bridge~    0.466         0.463  7.66e5    125.   2.99e-21     1 -2169. 4345.
## 7 MTA        0.622         0.619  4.44e6    235.   5.59e-32     1 -2424. 4854.
## 8 Bikes      0.0277        0.0209 4.69e4      4.08 4.53e- 2     1 -1764. 3535.
## # ... with 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>,
## #   nobs <int>
weekly %>%
  select(-c(All_Grades, Regular, Midgrade, Premium, Date)) %>%
  map(~lm(.x ~ log(All_Grades), data = weekly)) %>% 
  map(glance) %>% 
  do.call(rbind.data.frame, .) %>% 
  rownames_to_column %>% 
  as_tibble %>% 
  ggplot(aes(x = reorder(rowname, -adj.r.squared), y = adj.r.squared)) +
  geom_col() +
  coord_flip() + 
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Mode of Transportation",
       y = expression(paste("Adjusted ", R^{2})),
       title = expression(paste("Comparison of Adjusted ", R^{2})))

Linear with Trend and Seasonality

model_aar <- weekly_ts %>% model(TSLM(`Access-A-Ride` ~ All_Grades + fourier(K = 13))) 

model_bat <- weekly_ts %>% model(TSLM(Bridges_and_Tunnels ~ log(All_Grades) + fourier(K = 13))) 

model_mta <- weekly_ts %>% model(TSLM(MTA ~ log(All_Grades) + trend() + fourier(K = 13))) 

names <- c("Subway", "Buses", "LIRR", "MetroNorth", "Bikes", "Access-A-Ride",
           "Bridges_and_Tunnels", "MTA")

trend_models <- 
  rbind(glance(model_subway),
        glance(model_buses),
        glance(model_LIRR),
        glance(model_metronorth),
        glance(model_bikes),
        glance(model_aar),
        glance(model_bat),
        glance(model_mta)) %>%
  select(.model:adj_r_squared) %>%
  cbind(names, .)


trend_models %>% select(.model:adj_r_squared)
##                                                           .model r_squared
## 1     TSLM(Subway ~ log(All_Grades) + trend() + fourier(K = 13)) 0.8293352
## 2      TSLM(Buses ~ log(All_Grades) + trend() + fourier(K = 13)) 0.6010397
## 3       TSLM(LIRR ~ log(All_Grades) + trend() + fourier(K = 13)) 0.9620075
## 4 TSLM(MetroNorth ~ log(All_Grades) + trend() + fourier(K = 13)) 0.9703479
## 5                     TSLM(Bikes ~ All_Grades + fourier(K = 13)) 0.8452933
## 6           TSLM(`Access-A-Ride` ~ All_Grades + fourier(K = 13)) 0.5976953
## 7  TSLM(Bridges_and_Tunnels ~ log(All_Grades) + fourier(K = 13)) 0.6789031
## 8        TSLM(MTA ~ log(All_Grades) + trend() + fourier(K = 13)) 0.7915920
##   adj_r_squared
## 1     0.7881403
## 2     0.5047390
## 3     0.9524238
## 4     0.9628001
## 5     0.8095918
## 6     0.5040555
## 7     0.6048038
## 8     0.7412866
trend_models %>% 
  as_tibble %>% 
  ggplot(aes(x = reorder(names, -adj_r_squared), y = adj_r_squared)) +
  geom_col() +
  coord_flip() + 
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Mode of Transportation",
       y = expression(paste("Adjusted ", R^{2})),
       title = expression(paste("Comparison of Adjusted ", R^{2})))