knitr::opts_chunk$set(echo = TRUE)
packages = c('dplyr','stringr')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p) 
  } 
  library(p,character.only = T) 
}
elements <- read.csv(file.path( "./origin_destination_bus_201909.csv"))
str(elements)
'data.frame':   6678132 obs. of  7 variables:
 $ YEAR_MONTH         : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
 $ DAY_TYPE           : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 2 1 1 2 1 1 2 1 1 2 ...
 $ TIME_PER_HOUR      : int  17 17 8 8 9 22 22 12 17 7 ...
 $ PT_TYPE            : Factor w/ 1 level "BUS": 1 1 1 1 1 1 1 1 1 1 ...
 $ ORIGIN_PT_CODE     : int  99049 99049 91099 91099 76251 10499 10499 1211 93019 31051 ...
 $ DESTINATION_PT_CODE: int  72031 72031 92091 92091 75339 10191 10191 62021 66349 44009 ...
 $ TOTAL_TRIPS        : int  16 16 112 48 16 32 32 16 16 112 ...

Clean data

bus_stop_weekday_AM_PM <- elements %>%
  filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
  filter(`DAY_TYPE` == "WEEKDAY") 
bus_stop_weekday_AM_PM[,5:6] <- bus_stop_weekday_AM_PM[,5:6]  %>% mutate_if(is.integer, as.character)
bus_stop_weekday_AM_PM$ORIGIN_PT_CODE <- ifelse(nchar(bus_stop_weekday_AM_PM$ORIGIN_PT_CODE) == 5, bus_stop_weekday_AM_PM$ORIGIN_PT_CODE,  paste("0", bus_stop_weekday_AM_PM$ORIGIN_PT_CODE,sep = ""))
bus_stop_weekday_AM_PM$DESTINATION_PT_CODE <- ifelse(nchar(bus_stop_weekday_AM_PM$DESTINATION_PT_CODE) == 5, bus_stop_weekday_AM_PM$DESTINATION_PT_CODE,  paste("0", bus_stop_weekday_AM_PM$DESTINATION_PT_CODE,sep = ""))
head(bus_stop_weekday_AM_PM,20)
punggol_trains <- read.csv(file.path( "./trains_punggol.csv"))
punggol_buses <- read.csv(file.path( "./bus_stops_punggol.csv"))
str(punggol_trains)
'data.frame':   16 obs. of  3 variables:
 $ OBJECTID: int  42 54 55 58 60 69 43 50 52 53 ...
 $ STN_NAME: Factor w/ 16 levels "CORAL EDGE LRT STATION",..: 7 4 1 3 11 5 12 8 6 14 ...
 $ STN_NO  : Factor w/ 16 levels "NE17","PE1","PE2",..: 7 6 4 8 5 3 10 9 14 16 ...
punggol_trains$STN_NO <- as.character(punggol_trains$STN_NO)
punggol_trains
#get all bus stop numbers in punggol
bus_stops<- punggol_buses$BUS_STOP_N
bus_stop_weekday_AM_PM <- bus_stop_weekday_AM_PM %>%
  filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
  filter(`DAY_TYPE` == "WEEKDAY") 
bus_stop_weekday_AM_PM <- bus_stop_weekday_AM_PM[bus_stop_weekday_AM_PM$ORIGIN_PT_CODE %in% bus_stops ,]
str(bus_stop_weekday_AM_PM)
'data.frame':   8288 obs. of  7 variables:
 $ YEAR_MONTH         : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
 $ DAY_TYPE           : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 1 1 1 1 1 1 1 1 1 1 ...
 $ TIME_PER_HOUR      : int  19 19 9 9 8 19 8 19 19 9 ...
 $ PT_TYPE            : Factor w/ 1 level "BUS": 1 1 1 1 1 1 1 1 1 1 ...
 $ ORIGIN_PT_CODE     : chr  "65411" "65281" "65229" "65199" ...
 $ DESTINATION_PT_CODE: chr  "65399" "63181" "65339" "84529" ...
 $ TOTAL_TRIPS        : int  144 16 112 1248 16 4928 32 32 144 48 ...
bus_stop_weekday_AM_PM

TRAIN DATA

train_data <- read.csv(file.path( "./origin_destination_train_201909.csv"))
train_weekday_AM_PM <- train_data %>%
  filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
  filter(`DAY_TYPE` == "WEEKDAY") 
train_weekday_AM_PM[,5:6] <- train_weekday_AM_PM[,5:6]  %>% mutate_if(is.factor, as.character)
str(train_weekday_AM_PM)
'data.frame':   93967 obs. of  7 variables:
 $ YEAR_MONTH         : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
 $ DAY_TYPE           : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 1 1 1 1 1 1 1 1 1 1 ...
 $ TIME_PER_HOUR      : int  8 8 18 18 8 8 19 19 19 19 ...
 $ PT_TYPE            : Factor w/ 1 level "TRAIN": 1 1 1 1 1 1 1 1 1 1 ...
 $ ORIGIN_PT_CODE     : chr  "CC10" "CC22" "EW20" "DT13" ...
 $ DESTINATION_PT_CODE: chr  "CC22" "CC10" "DT13" "EW20" ...
 $ TOTAL_TRIPS        : int  509 384 41 23 2 43 4 204 29 94 ...
train_stops <- punggol_trains$STN_NO
train_stops <- c(train_stops, 'NE17-PTC')
train_stops
 [1] "PE6"      "PE5"      "PE3"      "PE7"      "PE4"      "PE2"      "PW1"      "PTC"      "PW5"      "PW7"     
[11] "PW6"      "PW2"      "NE17"     "PW4"      "PW3"      "PE1"      "NE17-PTC"
train_weekday_AM_PM <- train_weekday_AM_PM[train_weekday_AM_PM$ORIGIN_PT_CODE %in% train_stops ,]
train_weekday_AM_PM
train_weekday_AM_PM$ORIGIN_PT_CODE[train_weekday_AM_PM$ORIGIN_PT_COD == "NE17-PTC"] <- "NE17"
train_weekday_AM_PM$DESTINATION_PT_CODE[train_weekday_AM_PM$DESTINATION_PT_CODE == "NE17-PTC"] <- "NE17"
#train_weekday_AM_PM$DESTINATION_PT_CODE[grepl('-', train_weekday_AM_PM$DESTINATION_PT_CODE)] <-
#   train_weekday_AM_PM$DESTINATION_PT_CODE
#train_weekday_AM_PM$ORIGIN_PT_CODE[grepl('-', train_weekday_AM_PM$ORIGIN_PT_CODE)] <- strsplit(train_weekday_AM_PM$ORIGIN_PT_CODE, "-")[[1]][1]
train_weekday_AM_PM
write.csv(train_weekday_AM_PM,'train_peak_hour_data_sept.csv')
write.csv(bus_stop_weekday_AM_PM,'bus_peak_hour_data_sept.csv')
LS0tCnRpdGxlOiAiUHVuZ2dvbCBQZWFrIEhvdXIgVHJhdmVsIFBhdHRlcm4gQW5hbHlzaXMiCmF1dGhvcjogIkplcnJ5IFRvaHZhbiIKZGF0ZTogIjExLzUvMjAxOSIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBudW1iZXJfc2VjdGlvbjogeWVzCiAgICB0aGVtZTogZmxhdGx5CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICBodG1sX2RvY3VtZW50OgogICAgY29kZV9mb2xkaW5nOiBoaWRlCiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgcGRmX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKICB3b3JkX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKLS0tCgoKYGBge3Igc2V0dXAsIGluY2x1ZGU9VFJVRSwgZXZhbD1UUlVFLG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgoKYGBge3J9CnBhY2thZ2VzID0gYygnZHBseXInLCdzdHJpbmdyJykKZm9yIChwIGluIHBhY2thZ2VzKXsKICBpZighcmVxdWlyZShwLCBjaGFyYWN0ZXIub25seSA9IFQpKXsKICAgIGluc3RhbGwucGFja2FnZXMocCkgCiAgfSAKICBsaWJyYXJ5KHAsY2hhcmFjdGVyLm9ubHkgPSBUKSAKfQpgYGAKCgpgYGB7cn0KZWxlbWVudHMgPC0gcmVhZC5jc3YoZmlsZS5wYXRoKCAiLi9vcmlnaW5fZGVzdGluYXRpb25fYnVzXzIwMTkwOS5jc3YiKSkKCmBgYAoKCmBgYHtyfQpzdHIoZWxlbWVudHMpCmBgYAoKQ2xlYW4gZGF0YQpgYGB7cn0KYnVzX3N0b3Bfd2Vla2RheV9BTV9QTSA8LSBlbGVtZW50cyAlPiUKICBmaWx0ZXIoYFRJTUVfUEVSX0hPVVJgID09IDggfCBgVElNRV9QRVJfSE9VUmAgPT0gOSB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOCB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOSkgJT4lCiAgZmlsdGVyKGBEQVlfVFlQRWAgPT0gIldFRUtEQVkiKSAKYnVzX3N0b3Bfd2Vla2RheV9BTV9QTVssNTo2XSA8LSBidXNfc3RvcF93ZWVrZGF5X0FNX1BNWyw1OjZdICAlPiUgbXV0YXRlX2lmKGlzLmludGVnZXIsIGFzLmNoYXJhY3RlcikKCmJ1c19zdG9wX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPREUgPC0gaWZlbHNlKG5jaGFyKGJ1c19zdG9wX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPREUpID09IDUsIGJ1c19zdG9wX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPREUsICBwYXN0ZSgiMCIsIGJ1c19zdG9wX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPREUsc2VwID0gIiIpKQoKYnVzX3N0b3Bfd2Vla2RheV9BTV9QTSRERVNUSU5BVElPTl9QVF9DT0RFIDwtIGlmZWxzZShuY2hhcihidXNfc3RvcF93ZWVrZGF5X0FNX1BNJERFU1RJTkFUSU9OX1BUX0NPREUpID09IDUsIGJ1c19zdG9wX3dlZWtkYXlfQU1fUE0kREVTVElOQVRJT05fUFRfQ09ERSwgIHBhc3RlKCIwIiwgYnVzX3N0b3Bfd2Vla2RheV9BTV9QTSRERVNUSU5BVElPTl9QVF9DT0RFLHNlcCA9ICIiKSkKYGBgCgoKCgpgYGB7cn0KaGVhZChidXNfc3RvcF93ZWVrZGF5X0FNX1BNLDIwKQpwdW5nZ29sX3RyYWlucyA8LSByZWFkLmNzdihmaWxlLnBhdGgoICIuL3RyYWluc19wdW5nZ29sLmNzdiIpKQpwdW5nZ29sX2J1c2VzIDwtIHJlYWQuY3N2KGZpbGUucGF0aCggIi4vYnVzX3N0b3BzX3B1bmdnb2wuY3N2IikpCnN0cihwdW5nZ29sX3RyYWlucykKcHVuZ2dvbF90cmFpbnMkU1ROX05PIDwtIGFzLmNoYXJhY3RlcihwdW5nZ29sX3RyYWlucyRTVE5fTk8pCgpwdW5nZ29sX3RyYWlucwoKI2dldCBhbGwgYnVzIHN0b3AgbnVtYmVycyBpbiBwdW5nZ29sCmBgYAoKCmBgYHtyfQpidXNfc3RvcHM8LSBwdW5nZ29sX2J1c2VzJEJVU19TVE9QX04KCmJ1c19zdG9wX3dlZWtkYXlfQU1fUE0gPC0gYnVzX3N0b3Bfd2Vla2RheV9BTV9QTSAlPiUKICBmaWx0ZXIoYFRJTUVfUEVSX0hPVVJgID09IDggfCBgVElNRV9QRVJfSE9VUmAgPT0gOSB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOCB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOSkgJT4lCiAgZmlsdGVyKGBEQVlfVFlQRWAgPT0gIldFRUtEQVkiKSAKCmJ1c19zdG9wX3dlZWtkYXlfQU1fUE0gPC0gYnVzX3N0b3Bfd2Vla2RheV9BTV9QTVtidXNfc3RvcF93ZWVrZGF5X0FNX1BNJE9SSUdJTl9QVF9DT0RFICVpbiUgYnVzX3N0b3BzICxdCnN0cihidXNfc3RvcF93ZWVrZGF5X0FNX1BNKQoKYnVzX3N0b3Bfd2Vla2RheV9BTV9QTQpgYGAKCgoKVFJBSU4gREFUQQpgYGB7cn0KdHJhaW5fZGF0YSA8LSByZWFkLmNzdihmaWxlLnBhdGgoICIuL29yaWdpbl9kZXN0aW5hdGlvbl90cmFpbl8yMDE5MDkuY3N2IikpCgpgYGAKYGBge3J9CnRyYWluX3dlZWtkYXlfQU1fUE0gPC0gdHJhaW5fZGF0YSAlPiUKICBmaWx0ZXIoYFRJTUVfUEVSX0hPVVJgID09IDggfCBgVElNRV9QRVJfSE9VUmAgPT0gOSB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOCB8IGBUSU1FX1BFUl9IT1VSYCA9PSAxOSkgJT4lCiAgZmlsdGVyKGBEQVlfVFlQRWAgPT0gIldFRUtEQVkiKSAKdHJhaW5fd2Vla2RheV9BTV9QTVssNTo2XSA8LSB0cmFpbl93ZWVrZGF5X0FNX1BNWyw1OjZdICAlPiUgbXV0YXRlX2lmKGlzLmZhY3RvciwgYXMuY2hhcmFjdGVyKQoKc3RyKHRyYWluX3dlZWtkYXlfQU1fUE0pCmBgYAoKYGBge3J9CnRyYWluX3N0b3BzIDwtIHB1bmdnb2xfdHJhaW5zJFNUTl9OTwp0cmFpbl9zdG9wcyA8LSBjKHRyYWluX3N0b3BzLCAnTkUxNy1QVEMnKQp0cmFpbl9zdG9wcwp0cmFpbl93ZWVrZGF5X0FNX1BNIDwtIHRyYWluX3dlZWtkYXlfQU1fUE1bdHJhaW5fd2Vla2RheV9BTV9QTSRPUklHSU5fUFRfQ09ERSAlaW4lIHRyYWluX3N0b3BzICxdCgp0cmFpbl93ZWVrZGF5X0FNX1BNCgp0cmFpbl93ZWVrZGF5X0FNX1BNJE9SSUdJTl9QVF9DT0RFW3RyYWluX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPRCA9PSAiTkUxNy1QVEMiXSA8LSAiTkUxNyIKdHJhaW5fd2Vla2RheV9BTV9QTSRERVNUSU5BVElPTl9QVF9DT0RFW3RyYWluX3dlZWtkYXlfQU1fUE0kREVTVElOQVRJT05fUFRfQ09ERSA9PSAiTkUxNy1QVEMiXSA8LSAiTkUxNyIKCiN0cmFpbl93ZWVrZGF5X0FNX1BNJERFU1RJTkFUSU9OX1BUX0NPREVbZ3JlcGwoJy0nLCB0cmFpbl93ZWVrZGF5X0FNX1BNJERFU1RJTkFUSU9OX1BUX0NPREUpXSA8LQojICAgdHJhaW5fd2Vla2RheV9BTV9QTSRERVNUSU5BVElPTl9QVF9DT0RFCiN0cmFpbl93ZWVrZGF5X0FNX1BNJE9SSUdJTl9QVF9DT0RFW2dyZXBsKCctJywgdHJhaW5fd2Vla2RheV9BTV9QTSRPUklHSU5fUFRfQ09ERSldIDwtIHN0cnNwbGl0KHRyYWluX3dlZWtkYXlfQU1fUE0kT1JJR0lOX1BUX0NPREUsICItIilbWzFdXVsxXQoKCnRyYWluX3dlZWtkYXlfQU1fUE0KYGBgCgpgYGB7cn0Kd3JpdGUuY3N2KHRyYWluX3dlZWtkYXlfQU1fUE0sJ3RyYWluX3BlYWtfaG91cl9kYXRhX3NlcHQuY3N2JykKd3JpdGUuY3N2KGJ1c19zdG9wX3dlZWtkYXlfQU1fUE0sJ2J1c19wZWFrX2hvdXJfZGF0YV9zZXB0LmNzdicpCgpgYGAKCg==