Rows to spread out:
INDICATOR_NAME Mean Distance Between Failures - Subways Total Ridership - Subways Customer Injury Rate - Subways Elevator Availability - Subways Escalator Availability - Subways On-Time Performance (Terminal)
Columns we care about:
INDICATOR_NAME [4] PERIOD_YEAR [11] PERIOD_MONTH [12] MONTHLY_ACTUAL [16] Subway Wait Assessment
subway <- subway[, c(4, 11, 12, 16)]
mdf <- subway %>%
filter(INDICATOR_NAME == "Mean Distance Between Failures - Subways") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
tr <- subway %>%
filter(INDICATOR_NAME == "Total Ridership - Subways") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
cir <- subway %>%
filter(INDICATOR_NAME == "Customer Injury Rate - Subways") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
elev <- subway %>%
filter(INDICATOR_NAME == "Elevator Availability - Subways") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
esc <- subway %>%
filter(INDICATOR_NAME == "Escalator Availability - Subways") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
otp <- subway %>%
filter(INDICATOR_NAME == "On-Time Performance (Terminal)") %>%
spread(INDICATOR_NAME, MONTHLY_ACTUAL)
final.subway <- left_join(mdf, tr, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, cir, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, elev, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, esc, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
final.subway <- left_join(final.subway, otp, by = c("PERIOD_YEAR", "PERIOD_MONTH"))
# Focus on OTP
final.subway <- final.subway[-c(1:17),]
colnames(final.subway) <- c("YEAR", "MONTH", "FAILURE", "RIDERSHIP", "INJURY", "ELEV", "ESCA", "OTP")
plot(final.subway$YEAR, final.subway$RIDERSHIP)
plot(final.subway$YEAR, final.subway$OTP)
# Model
model <- lm(OTP ~ FAILURE + RIDERSHIP + INJURY + ELEV + ESCA,
data = final.subway)
summary(model)
##
## Call:
## lm(formula = OTP ~ FAILURE + RIDERSHIP + INJURY + ELEV + ESCA,
## data = final.subway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2324 -3.5214 -0.0817 2.9602 11.8279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.735e+02 7.285e+01 -2.382 0.0192 *
## FAILURE 1.582e-04 1.890e-05 8.369 5.34e-13 ***
## RIDERSHIP -3.371e-07 7.612e-08 -4.428 2.56e-05 ***
## INJURY -1.728e+00 1.603e+00 -1.078 0.2837
## ELEV 3.470e+00 7.784e-01 4.457 2.29e-05 ***
## ESCA -5.904e-01 4.241e-01 -1.392 0.1671
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.122 on 94 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.7036, Adjusted R-squared: 0.6878
## F-statistic: 44.62 on 5 and 94 DF, p-value: < 2.2e-16
qqnorm(model$residuals)
qqline(model$residuals)