knitr::opts_chunk$set(echo = TRUE)
packages = c('dplyr','stringr')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
elements <- read.csv(file.path( "./origin_destination_bus_201909.csv"))
str(elements)
'data.frame': 6678132 obs. of 7 variables:
$ YEAR_MONTH : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
$ DAY_TYPE : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 2 1 1 2 1 1 2 1 1 2 ...
$ TIME_PER_HOUR : int 17 17 8 8 9 22 22 12 17 7 ...
$ PT_TYPE : Factor w/ 1 level "BUS": 1 1 1 1 1 1 1 1 1 1 ...
$ ORIGIN_PT_CODE : int 99049 99049 91099 91099 76251 10499 10499 1211 93019 31051 ...
$ DESTINATION_PT_CODE: int 72031 72031 92091 92091 75339 10191 10191 62021 66349 44009 ...
$ TOTAL_TRIPS : int 16 16 112 48 16 32 32 16 16 112 ...
Clean data
bus_stop_weekday_AM_PM <- elements %>%
filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
filter(`DAY_TYPE` == "WEEKDAY")
bus_stop_weekday_AM_PM[,5:6] <- bus_stop_weekday_AM_PM[,5:6] %>% mutate_if(is.integer, as.character)
bus_stop_weekday_AM_PM$ORIGIN_PT_CODE <- ifelse(nchar(bus_stop_weekday_AM_PM$ORIGIN_PT_CODE) == 5, bus_stop_weekday_AM_PM$ORIGIN_PT_CODE, paste("0", bus_stop_weekday_AM_PM$ORIGIN_PT_CODE,sep = ""))
bus_stop_weekday_AM_PM$DESTINATION_PT_CODE <- ifelse(nchar(bus_stop_weekday_AM_PM$DESTINATION_PT_CODE) == 5, bus_stop_weekday_AM_PM$DESTINATION_PT_CODE, paste("0", bus_stop_weekday_AM_PM$DESTINATION_PT_CODE,sep = ""))
head(bus_stop_weekday_AM_PM,20)
punggol_trains <- read.csv(file.path( "./trains_punggol.csv"))
punggol_buses <- read.csv(file.path( "./bus_stops_punggol.csv"))
str(punggol_trains)
'data.frame': 16 obs. of 3 variables:
$ OBJECTID: int 42 54 55 58 60 69 43 50 52 53 ...
$ STN_NAME: Factor w/ 16 levels "CORAL EDGE LRT STATION",..: 7 4 1 3 11 5 12 8 6 14 ...
$ STN_NO : Factor w/ 16 levels "NE17","PE1","PE2",..: 7 6 4 8 5 3 10 9 14 16 ...
punggol_trains$STN_NO <- as.character(punggol_trains$STN_NO)
punggol_trains
#get all bus stop numbers in punggol
bus_stops<- punggol_buses$BUS_STOP_N
bus_stop_weekday_AM_PM <- bus_stop_weekday_AM_PM %>%
filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
filter(`DAY_TYPE` == "WEEKDAY")
bus_stop_weekday_AM_PM <- bus_stop_weekday_AM_PM[bus_stop_weekday_AM_PM$ORIGIN_PT_CODE %in% bus_stops ,]
str(bus_stop_weekday_AM_PM)
'data.frame': 8288 obs. of 7 variables:
$ YEAR_MONTH : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
$ DAY_TYPE : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 1 1 1 1 1 1 1 1 1 1 ...
$ TIME_PER_HOUR : int 19 19 9 9 8 19 8 19 19 9 ...
$ PT_TYPE : Factor w/ 1 level "BUS": 1 1 1 1 1 1 1 1 1 1 ...
$ ORIGIN_PT_CODE : chr "65411" "65281" "65229" "65199" ...
$ DESTINATION_PT_CODE: chr "65399" "63181" "65339" "84529" ...
$ TOTAL_TRIPS : int 144 16 112 1248 16 4928 32 32 144 48 ...
bus_stop_weekday_AM_PM
TRAIN DATA
train_data <- read.csv(file.path( "./origin_destination_train_201909.csv"))
train_weekday_AM_PM <- train_data %>%
filter(`TIME_PER_HOUR` == 8 | `TIME_PER_HOUR` == 9 | `TIME_PER_HOUR` == 18 | `TIME_PER_HOUR` == 19) %>%
filter(`DAY_TYPE` == "WEEKDAY")
train_weekday_AM_PM[,5:6] <- train_weekday_AM_PM[,5:6] %>% mutate_if(is.factor, as.character)
str(train_weekday_AM_PM)
'data.frame': 93967 obs. of 7 variables:
$ YEAR_MONTH : Factor w/ 1 level "2019-09": 1 1 1 1 1 1 1 1 1 1 ...
$ DAY_TYPE : Factor w/ 2 levels "WEEKDAY","WEEKENDS/HOLIDAY": 1 1 1 1 1 1 1 1 1 1 ...
$ TIME_PER_HOUR : int 8 8 18 18 8 8 19 19 19 19 ...
$ PT_TYPE : Factor w/ 1 level "TRAIN": 1 1 1 1 1 1 1 1 1 1 ...
$ ORIGIN_PT_CODE : chr "CC10" "CC22" "EW20" "DT13" ...
$ DESTINATION_PT_CODE: chr "CC22" "CC10" "DT13" "EW20" ...
$ TOTAL_TRIPS : int 509 384 41 23 2 43 4 204 29 94 ...
train_stops <- punggol_trains$STN_NO
train_stops <- c(train_stops, 'NE17-PTC')
train_stops
[1] "PE6" "PE5" "PE3" "PE7" "PE4" "PE2" "PW1" "PTC" "PW5" "PW7"
[11] "PW6" "PW2" "NE17" "PW4" "PW3" "PE1" "NE17-PTC"
train_weekday_AM_PM <- train_weekday_AM_PM[train_weekday_AM_PM$ORIGIN_PT_CODE %in% train_stops ,]
train_weekday_AM_PM
train_weekday_AM_PM$ORIGIN_PT_CODE[train_weekday_AM_PM$ORIGIN_PT_COD == "NE17-PTC"] <- "NE17"
train_weekday_AM_PM$DESTINATION_PT_CODE[train_weekday_AM_PM$DESTINATION_PT_CODE == "NE17-PTC"] <- "NE17"
#train_weekday_AM_PM$DESTINATION_PT_CODE[grepl('-', train_weekday_AM_PM$DESTINATION_PT_CODE)] <-
# train_weekday_AM_PM$DESTINATION_PT_CODE
#train_weekday_AM_PM$ORIGIN_PT_CODE[grepl('-', train_weekday_AM_PM$ORIGIN_PT_CODE)] <- strsplit(train_weekday_AM_PM$ORIGIN_PT_CODE, "-")[[1]][1]
train_weekday_AM_PM
write.csv(train_weekday_AM_PM,'train_peak_hour_data_sept.csv')
write.csv(bus_stop_weekday_AM_PM,'bus_peak_hour_data_sept.csv')