Objectives:

Install and Load packages:

packages = c('sp', 'rgeos', 'sf', 'rgdal', 'spdep',  'tmap', 'tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: sp
## Loading required package: rgeos
## rgeos version: 0.5-2, (SVN revision 621)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.4-1 
##  Polygon checking: TRUE
## Loading required package: sf
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Loading required package: rgdal
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/TYZ/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/TYZ/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.4-1
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: tmap
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Importing passenger volume data into R:

pv <- read_csv("data/aspatial/passenger volume by busstop.csv")
## Parsed with column specification:
## cols(
##   YEAR_MONTH = col_character(),
##   DAY_TYPE = col_character(),
##   TIME_PER_HOUR = col_double(),
##   PT_TYPE = col_character(),
##   PT_CODE = col_character(),
##   TOTAL_TAP_IN_VOLUME = col_double(),
##   TOTAL_TAP_OUT_VOLUME = col_double()
## )

Importing population data into R:

pop_raw <- read_csv("data/aspatial/resident-population-by-subzone-age-group-and-sex.csv")
## Parsed with column specification:
## cols(
##   year = col_double(),
##   level_1 = col_character(),
##   level_2 = col_character(),
##   level_3 = col_character(),
##   level_4 = col_character(),
##   value = col_double()
## )

Viewing population data

pop_raw
## # A tibble: 12,616 x 6
##     year level_1 level_2 level_3           level_4                value
##    <dbl> <chr>   <chr>   <chr>             <chr>                  <dbl>
##  1  2015 Total   Total   Ang Mo Kio- Total Ang Mo Kio Town Centre  5020
##  2  2015 Total   Total   Ang Mo Kio- Total Cheng San              29770
##  3  2015 Total   Total   Ang Mo Kio- Total Chong Boon             27900
##  4  2015 Total   Total   Ang Mo Kio- Total Kebun Bahru            23910
##  5  2015 Total   Total   Ang Mo Kio- Total Sembawang Hills         6890
##  6  2015 Total   Total   Ang Mo Kio- Total Shangri-La             18510
##  7  2015 Total   Total   Ang Mo Kio- Total Tagore                  8350
##  8  2015 Total   Total   Ang Mo Kio- Total Townsville             23770
##  9  2015 Total   Total   Ang Mo Kio- Total Yio Chu Kang              30
## 10  2015 Total   Total   Ang Mo Kio- Total Yio Chu Kang East       4080
## # ... with 12,606 more rows

Tidying population data

pop <- pop_raw %>%
  filter(level_2 == "Total" & level_1 == "Total" )
pop <- pop %>%
  select(`level_4`, `value`)
pop$level_4 <- toupper(pop$level_4)

pop now contains total population of each subzone.

Importing geospatial data:

subzones <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `D:\SMU\IS415\Take-home ex1\Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## proj4string:    +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs

Importing bus stops geospatial data

bus_stops <- st_read(dsn = "data/geospatial", layer = "BusStop")
## Reading layer `BusStop' from data source `D:\SMU\IS415\Take-home ex1\Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5040 features and 3 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## proj4string:    +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs

Tidying up passenger volume data

Simplifing data by eliminating irrelevant columns:

pv_1 <- pv %>%
  select(`PT_CODE`, `TOTAL_TAP_IN_VOLUME`, `TOTAL_TAP_OUT_VOLUME`)

Combining rows of tap in/out value and calculating the mean value by their unique pt_code:

pv_tap_in <- pv_1 %>%
  group_by(PT_CODE) %>% summarise(TOTAL_TAP_IN = mean(TOTAL_TAP_IN_VOLUME))

pv_tap_out <- pv_1 %>%
  group_by(PT_CODE) %>% summarise(TOTAL_TAP_OUT = mean(TOTAL_TAP_OUT_VOLUME))

Combining passenger volume table with bus stops table by bus stop number(PT_CODE):

pv_tap_in_sz <- pv_tap_in %>%
  left_join(bus_stops, by = c("PT_CODE" = "BUS_STOP_N"))
## Warning: Column `PT_CODE`/`BUS_STOP_N` joining character vector and factor,
## coercing into character vector
pv_tap_out_sz <- pv_tap_out %>%
  left_join(bus_stops, by = c("PT_CODE" = "BUS_STOP_N"))
## Warning: Column `PT_CODE`/`BUS_STOP_N` joining character vector and factor,
## coercing into character vector

Remove invalid rows (containing NA) from data table

pv_tap_in_sz <- pv_tap_in_sz %>% drop_na()
pv_tap_out_sz <- pv_tap_out_sz %>% drop_na()

Combine population data with subzone data

pop_sub <- pop %>% 
  left_join(subzones, by = c("level_4" = "SUBZONE_N"))
## Warning: Column `level_4`/`SUBZONE_N` joining character vector and factor,
## coercing into character vector

Joining tap in/out data with subzone data

wm <- poly2nb(pop_sub, queen = TRUE)