Rows to spread out:

INDICATOR_NAME Mean Distance Between Failures - Subways Total Ridership - Subways Customer Injury Rate - Subways Elevator Availability - Subways Escalator Availability - Subways On-Time Performance (Terminal)

Columns we care about:

INDICATOR_NAME [4] PERIOD_YEAR [11] PERIOD_MONTH [12] MONTHLY_ACTUAL [16] Subway Wait Assessment

subway <- subway[, c(4, 11, 12, 16)]

mdf <- subway %>%
          filter(INDICATOR_NAME == "Mean Distance Between Failures - Subways") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)
tr <- subway %>%
          filter(INDICATOR_NAME == "Total Ridership - Subways") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)
cir <- subway %>%
          filter(INDICATOR_NAME == "Customer Injury Rate - Subways") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)
elev <- subway %>%
          filter(INDICATOR_NAME == "Elevator Availability - Subways") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)
esc <- subway %>%
          filter(INDICATOR_NAME == "Escalator Availability - Subways") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)
otp <- subway %>%
          filter(INDICATOR_NAME == "On-Time Performance (Terminal)") %>%
          spread(INDICATOR_NAME, MONTHLY_ACTUAL)

final.subway <- left_join(mdf, tr, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, cir, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, elev, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, esc, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, otp, by = c("PERIOD_YEAR", "PERIOD_MONTH"))

# Focus on OTP
final.subway <- final.subway[-c(1:17),]
colnames(final.subway) <- c("YEAR", "MONTH", "FAILURE", "RIDERSHIP", "INJURY", "ELEV", "ESCA", "OTP")
plot(final.subway$YEAR, final.subway$RIDERSHIP)

plot(final.subway$YEAR, final.subway$OTP)

# Model
model <- lm(OTP ~ FAILURE + RIDERSHIP + INJURY + ELEV + ESCA,
            data = final.subway)

summary(model)
## 
## Call:
## lm(formula = OTP ~ FAILURE + RIDERSHIP + INJURY + ELEV + ESCA, 
##     data = final.subway)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2324  -3.5214  -0.0817   2.9602  11.8279 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.735e+02  7.285e+01  -2.382   0.0192 *  
## FAILURE      1.582e-04  1.890e-05   8.369 5.34e-13 ***
## RIDERSHIP   -3.371e-07  7.612e-08  -4.428 2.56e-05 ***
## INJURY      -1.728e+00  1.603e+00  -1.078   0.2837    
## ELEV         3.470e+00  7.784e-01   4.457 2.29e-05 ***
## ESCA        -5.904e-01  4.241e-01  -1.392   0.1671    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.122 on 94 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.7036, Adjusted R-squared:  0.6878 
## F-statistic: 44.62 on 5 and 94 DF,  p-value: < 2.2e-16
qqnorm(model$residuals)
qqline(model$residuals)