Following the advent of the Open Data Initiative as well as The Smart Nation vision, Singapore has had a boon of data available for generating new insights. However, little work has been done on the data provided by various government agencies. Therefore, the objectives for this exercise is to demonstrate the potential contribution of geospatial analytics in R to integrate, analyse and communicate the analysis results. Primarily, the stated objectives are as follows:
Calibrating a simple linear regression to reveal the relation between public bus commuters’ flows (i.e. tap-in or tap-out) data and residential population at the planning sub-zone level.
Performing spatial autocorrelation analysis on the residual of the regression model to test if the model conforms to the randomization assumption.
Performing localized geospatial statistics analysis by using commuters’ tap-in and tap-out data to identify geographical clustering.
We will need to ensure that rgdal, spdep, sf, tmap and tidyverse packages of R are currently installed and loaded into your R for this exercise.
packages = c('rgdal', 'sf', 'spdep', 'tmap', 'tidyverse')
for (p in packages){
if (!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
One dataset by the name “passenger volume by busstop” has been provided. For the exercise we will be looking at the commuters’ flows (tap-in / tap-out) data for the month of January 2020.
For this exercise we will only be using data from 2019.
rDwelling <- read_csv ("data/aspatial/respopagesextod2011to2019.csv") %>%
filter(Time == 2019)
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## TOD = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
According to transitlink, children below 0.9m and the age of 7 can travel with an paying adult without requiring a concession card. Judging by the height charts of boys and girls from ages 0 - 7, they will not likely require a concession card. Therefore, we will be omitting AG “0_to_4”.
Sources: + https://www.transitlink.com.sg/TIdetail.aspx?ty=art&Id=79 + https://www.nhgp.com.sg/uploadedFiles/Our_Services/General_Medical_Services/HEIGHT%20CHART%20FOR%20BOYS.pdf + https://www.nhgp.com.sg/uploadedFiles/Our_Services/General_Medical_Services/HEIGHT%20CHART%20FOR%20GIRLS.pdf
rDwelling_FilterAG <- rDwelling %>%
filter(AG != "0_to_4")
rDwelling_FilterAG[rowSums(is.na(rDwelling_FilterAG))!=0,]
## # A tibble: 0 x 7
## # ... with 7 variables: PA <chr>, SZ <chr>, AG <chr>, Sex <chr>, TOD <chr>,
## # Pop <dbl>, Time <dbl>
rDwelling_FilterAG_Sum <- rDwelling_FilterAG %>%
group_by(SZ = SZ) %>%
summarise(Pop = sum(Pop)) %>%
mutate_at(.vars = vars(SZ), .funs = funs(toupper))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
rDwelling_FilterAG_Sum[rowSums(is.na(rDwelling_FilterAG_Sum))!=0,]
## # A tibble: 0 x 2
## # ... with 2 variables: SZ <chr>, Pop <dbl>
PV_by_BS <- read_csv("data/aspatial/passenger volume by busstop.csv")
## Parsed with column specification:
## cols(
## YEAR_MONTH = col_character(),
## DAY_TYPE = col_character(),
## TIME_PER_HOUR = col_double(),
## PT_TYPE = col_character(),
## PT_CODE = col_character(),
## TOTAL_TAP_IN_VOLUME = col_double(),
## TOTAL_TAP_OUT_VOLUME = col_double()
## )
PV_by_BS[rowSums(is.na(PV_by_BS))!=0,]
## # A tibble: 0 x 7
## # ... with 7 variables: YEAR_MONTH <chr>, DAY_TYPE <chr>, TIME_PER_HOUR <dbl>,
## # PT_TYPE <chr>, PT_CODE <chr>, TOTAL_TAP_IN_VOLUME <dbl>,
## # TOTAL_TAP_OUT_VOLUME <dbl>
PV_by_BS_Sum <- PV_by_BS %>%
group_by(PT_CODE = PT_CODE) %>%
summarise_at(vars(contains("TAP")),sum)
PV_by_BS_Sum[rowSums(is.na(PV_by_BS_Sum))!=0,]
## # A tibble: 0 x 3
## # ... with 3 variables: PT_CODE <chr>, TOTAL_TAP_IN_VOLUME <dbl>,
## # TOTAL_TAP_OUT_VOLUME <dbl>
Source: https://www.mytransport.sg/content/dam/datamall/datasets/Geospatial/BusStopLocation.zip
BS_Loc <- st_read(dsn = 'data/geospatial', layer = "BusStop")
## Reading layer `BusStop' from data source `C:\Users\jiiireh\Desktop\IS415_Take-home_Ex01\Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5040 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## projected CRS: SVY21
BS_Loc <- st_transform(BS_Loc, 3414)
st_crs(BS_Loc)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
BS_Loc[rowSums(is.na(BS_Loc))!=0,]
## Simple feature collection with 121 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 10792.71 ymin: 28191.17 xmax: 43485.8 ymax: 47931.22
## projected CRS: SVY21 / Singapore TM
## First 10 features:
## BUS_STOP_N BUS_ROOF_N LOC_DESC geometry
## 1 78221 B06 <NA> POINT (42227.96 39563.16)
## 36 30059 B15 <NA> POINT (16128.92 39596.94)
## 63 70291 B03 <NA> POINT (33981.28 35537.19)
## 105 96081 B05 <NA> POINT (41603.76 35413.11)
## 194 31171 B30 <NA> POINT (13634.64 39378.37)
## 221 66511 B02 <NA> POINT (32266 39747.92)
## 351 67481 B04 <NA> POINT (32945.05 41494.7)
## 352 83239 B06 <NA> POINT (37922.7 33240.12)
## 427 64521 B01A <NA> POINT (34249.3 39110.15)
## 536 31169 B31 <NA> POINT (13160.11 39376.74)
Since there are NA values in LOC_DESC, they will have to be replaced.
NONE = "NONE"
levels(BS_Loc$LOC_DESC) <- c(levels(BS_Loc$LOC_DESC), NONE)
BS_Loc <- BS_Loc %>%
replace_na(list(LOC_DESC = "NONE"))
BS_Loc[rowSums(is.na(BS_Loc))!=0,]
## Simple feature collection with 2 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 21294.95 ymin: 37068.44 xmax: 32659.61 ymax: 38543.97
## projected CRS: SVY21 / Singapore TM
## BUS_STOP_N BUS_ROOF_N LOC_DESC geometry
## 2897 43961 <NA> AFT DAIRY FARM HTS POINT (21294.95 38543.97)
## 2968 62141 <NA> OPP BLK 1 POINT (32659.61 37068.44)
BUS_ROOF_N also has NA values to be replaced.
levels(BS_Loc$BUS_ROOF_N) <- c(levels(BS_Loc$BUS_ROOF_N), NONE)
BS_Loc <- BS_Loc %>%
replace_na(list(BUS_ROOF_N = "NONE"))
BS_Loc[rowSums(is.na(BS_Loc))!=0,]
## Simple feature collection with 0 features and 3 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] BUS_STOP_N BUS_ROOF_N LOC_DESC geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(BS_Loc)
unique(isValid)
## [1] TRUE
BS_Loc_PV <- left_join(BS_Loc, PV_by_BS_Sum, by = c("BUS_STOP_N" = "PT_CODE"))
## Warning: Column `BUS_STOP_N`/`PT_CODE` joining factor and character vector,
## coercing into character vector
BS_Loc_PV[rowSums(is.na(BS_Loc_PV))!=0,]
## Simple feature collection with 78 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 7702.752 ymin: 27733.06 xmax: 41244.44 ymax: 49261.6
## projected CRS: SVY21 / Singapore TM
## First 10 features:
## BUS_STOP_N BUS_ROOF_N LOC_DESC TOTAL_TAP_IN_VOLUME
## 178 91121 NIL HSE 189 NA
## 256 44401 NIL OPP BLK 183 NA
## 344 31121 B40 NIRVANA MEMORIAL GDN NA
## 461 23381 B01A PLANT ENGRG NA
## 567 65601 NIL SUMANG STN EXIT B NA
## 574 65609 NIL SUMANG STN EXIT A NA
## 592 02031 B01 OPP THE ADELPHI NA
## 640 91151 NIL HAWAII TOWER NA
## 644 23411 B07 AFT JURONG MARINE BASE NA
## 692 18209 B01 BEF ALL ST CHAPEL NA
## TOTAL_TAP_OUT_VOLUME geometry
## 178 NA POINT (35030.24 31256.69)
## 256 NA POINT (20144.74 40279.55)
## 344 NA POINT (11838.8 39157.43)
## 461 NA POINT (12552.18 32234.11)
## 567 NA POINT (35281.38 43367.08)
## 574 NA POINT (35238.99 43359.3)
## 592 NA POINT (29991.79 30453.16)
## 640 NA POINT (34364.42 31052.96)
## 644 NA POINT (11634.46 31782.65)
## 692 NA POINT (23789.09 30790.59)
MPSZ_2014 <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `C:\Users\jiiireh\Desktop\IS415_Take-home_Ex01\Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
MPSZ_2014 <- st_transform(MPSZ_2014, 3414)
st_crs(MPSZ_2014)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
MPSZ_2014[rowSums(is.na(MPSZ_2014))!=0,]
## Simple feature collection with 0 features and 15 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## [7] PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## [13] Y_ADDR SHAPE_Leng SHAPE_Area geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(MPSZ_2014)
unique(isValid)
## [1] TRUE FALSE
Some polygons are invalid. They will require fixing,
MPSZ_2014 <- st_make_valid(MPSZ_2014)
isValid <- st_is_valid(MPSZ_2014)
unique(isValid)
## [1] TRUE
MPSZ_2014_rDwelling <- left_join(MPSZ_2014, rDwelling_FilterAG_Sum, by = c("SUBZONE_N" = "SZ"))
## Warning: Column `SUBZONE_N`/`SZ` joining factor and character vector, coercing
## into character vector
MPSZ_2014_rDwelling[rowSums(is.na(MPSZ_2014_rDwelling))!=0,]
## Simple feature collection with 0 features and 16 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## [7] PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## [13] Y_ADDR SHAPE_Leng SHAPE_Area Pop geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(MPSZ_2014_rDwelling)
unique(isValid)
## [1] TRUE
tm_shape(MPSZ_2014_rDwelling)+
tm_fill(col = "Pop",
n = 5,
style="jenks",
palette = "Purples",
title = "Residential Population Count") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Subzone Residential Dwelling Data from Department of Statistics Singapore & Master Plan Subzone Boundary from Urban Redevelopment Environment (URA)", position = c("left", "bottom"))+
tm_layout(title = "Residential Population Distribution 2019",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.06, 0.10, 0.10, 0.08))
MPSZ_BS_Loc <- st_join(MPSZ_2014_rDwelling, BS_Loc_PV)
MPSZ_BS_Loc <- st_transform(MPSZ_BS_Loc, 3414)
st_crs(MPSZ_BS_Loc)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
MPSZ_BS_Loc[rowSums(is.na(MPSZ_BS_Loc))!=0,]
## Simple feature collection with 94 features and 21 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 5539.013 ymin: 15748.72 xmax: 56396.44 ymax: 49122.83
## projected CRS: SVY21 / Singapore TM
## First 10 features:
## OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND
## 13 13 1 MARINA EAST MESZ01 Y
## 16 16 1 JURONG ISLAND AND BUKOM WISZ01 N
## 17 17 3 SUDONG WISZ03 N
## 18 18 2 SEMAKAU WISZ02 N
## 19 19 2 SOUTHERN GROUP SISZ02 N
## 24.11 24 1 MARITIME SQUARE BMSZ01 N
## 24.13 24 1 MARITIME SQUARE BMSZ01 N
## 24.14 24 1 MARITIME SQUARE BMSZ01 N
## 24.17 24 1 MARITIME SQUARE BMSZ01 N
## 31.8 31 11 CENTRAL SUBZONE DTSZ11 Y
## PLN_AREA_N PLN_AREA_C REGION_N REGION_C INC_CRC
## 13 MARINA EAST ME CENTRAL REGION CR 782A2FAF53029A34
## 16 WESTERN ISLANDS WI WEST REGION WR 699F7210FBF1AFA8
## 17 WESTERN ISLANDS WI WEST REGION WR F718C723E08FBD51
## 18 WESTERN ISLANDS WI WEST REGION WR E69207D4F76DEEA3
## 19 SOUTHERN ISLANDS SI CENTRAL REGION CR 5809FC547293EA2D
## 24.11 BUKIT MERAH BM CENTRAL REGION CR C1AC31ABF9978DDB
## 24.13 BUKIT MERAH BM CENTRAL REGION CR C1AC31ABF9978DDB
## 24.14 BUKIT MERAH BM CENTRAL REGION CR C1AC31ABF9978DDB
## 24.17 BUKIT MERAH BM CENTRAL REGION CR C1AC31ABF9978DDB
## 31.8 DOWNTOWN CORE DT CENTRAL REGION CR 8EFE9EC1AEF2DA0A
## FMEL_UPD_D X_ADDR Y_ADDR SHAPE_Leng SHAPE_Area Pop BUS_STOP_N
## 13 2014-12-05 32344.05 30103.25 6470.950 1844061 0 <NA>
## 16 2014-12-05 13012.88 27225.87 68083.936 36707721 0 <NA>
## 17 2014-12-05 15931.76 19579.07 24759.066 4207271 0 <NA>
## 18 2014-12-05 21206.33 20465.81 18703.681 4963787 0 <NA>
## 19 2014-12-05 29815.09 23412.59 25626.977 2206319 0 <NA>
## 24.11 2014-12-05 25805.79 27911.42 13737.116 2701634 3210 14401
## 24.13 2014-12-05 25805.79 27911.42 13737.116 2701634 3210 14419
## 24.14 2014-12-05 25805.79 27911.42 13737.116 2701634 3210 14429
## 24.17 2014-12-05 25805.79 27911.42 13737.116 2701634 3210 14409
## 31.8 2014-12-05 30125.84 28683.04 5002.016 1070723 740 03211
## BUS_ROOF_N LOC_DESC TOTAL_TAP_IN_VOLUME
## 13 <NA> <NA> NA
## 16 <NA> <NA> NA
## 17 <NA> <NA> NA
## 18 <NA> <NA> NA
## 19 <NA> <NA> NA
## 24.11 NIL OPP THE FURNITURE WAREHSE NA
## 24.13 NA CP A NA
## 24.14 B04 CP B NA
## 24.17 NA OPP BLK 3B NA
## 31.8 B01 BEF SHENTON WAY NA
## TOTAL_TAP_OUT_VOLUME geometry
## 13 NA MULTIPOLYGON (((33214.62 29...
## 16 NA MULTIPOLYGON (((14557.7 304...
## 17 NA MULTIPOLYGON (((15772.59 21...
## 18 NA MULTIPOLYGON (((19843.41 21...
## 19 NA MULTIPOLYGON (((27865.53 22...
## 24.11 NA POLYGON ((26514.58 28371.17...
## 24.13 NA POLYGON ((26514.58 28371.17...
## 24.14 NA POLYGON ((26514.58 28371.17...
## 24.17 NA POLYGON ((26514.58 28371.17...
## 31.8 NA MULTIPOLYGON (((30436.73 29...
There are NA values in BUS_STOP_N, BUS_ROOF_N, LOC_DESC, TOTAL_TAP_IN_VOLUME, TOTAL_TAP_OUT_VOLUME
levels(MPSZ_BS_Loc$BUS_STOP_N) <- c(levels(MPSZ_BS_Loc$BUS_STOP_N), NONE)
MPSZ_BS_Loc <- MPSZ_BS_Loc %>%
replace_na(list(BUS_STOP_N = "NONE"))
levels(MPSZ_BS_Loc$BUS_ROOF_N) <- c(levels(MPSZ_BS_Loc$BUS_ROOF_N), NONE)
MPSZ_BS_Loc <- MPSZ_BS_Loc %>%
replace_na(list(BUS_ROOF_N = "NONE"))
levels(MPSZ_BS_Loc$LOC_DESC) <- c(levels(MPSZ_BS_Loc$LOC_DESC), NONE)
MPSZ_BS_Loc <- MPSZ_BS_Loc %>%
replace_na(list(LOC_DESC = "NONE"))
MPSZ_BS_Loc$TOTAL_TAP_IN_VOLUME[is.na(MPSZ_BS_Loc$TOTAL_TAP_IN_VOLUME)] <- 0
MPSZ_BS_Loc$TOTAL_TAP_OUT_VOLUME[is.na(MPSZ_BS_Loc$TOTAL_TAP_OUT_VOLUME)] <- 0
MPSZ_BS_Loc[rowSums(is.na(MPSZ_BS_Loc))!=0,]
## Simple feature collection with 0 features and 21 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N
## [4] SUBZONE_C CA_IND PLN_AREA_N
## [7] PLN_AREA_C REGION_N REGION_C
## [10] INC_CRC FMEL_UPD_D X_ADDR
## [13] Y_ADDR SHAPE_Leng SHAPE_Area
## [16] Pop BUS_STOP_N BUS_ROOF_N
## [19] LOC_DESC TOTAL_TAP_IN_VOLUME TOTAL_TAP_OUT_VOLUME
## [22] geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(MPSZ_BS_Loc)
unique(isValid)
## [1] TRUE
For this exercise, we will be considering subzones who have all three residential populations and Tap In / Out’s values total up to 0. Likely a subzone that has such values is not accessable by most and hence, leaving it in may affect our auto-correlation analysis later on. We will retain any rows that have any values in the residential population, tap in / out data as a place such as Changi may have a lot of tap in / out volume but 0 residential population and hence is still applicable to our analysis.
MPSZ_BS_Loc_FilterSZ <- MPSZ_BS_Loc %>%
filter(Pop != 0 | TOTAL_TAP_IN_VOLUME != 0 | TOTAL_TAP_OUT_VOLUME != 0) %>%
group_by(SUBZONE_N = SUBZONE_N) %>%
mutate_at(vars(contains("TAP")),sum)
The code chunk above sums Tap In / Out volume by Subzone level, however at this point the dataframe has many duplicates as a result, we will proceed to remove the duplicates and the columns unnecessary for our analysis (Only requiring SUBZONE_N, Pop, Tap In / Out volume & Geometry)
MPSZ_BS_Loc_FilterSZ <- MPSZ_BS_Loc_FilterSZ[!duplicated(MPSZ_BS_Loc_FilterSZ[,c("SUBZONE_N")]),]
MPSZ_BS_Loc_FilterSZ <- select(MPSZ_BS_Loc_FilterSZ, -c(1,2,4:15,17:19))
MPSZ_BS_Loc_FilterSZ[rowSums(is.na(MPSZ_BS_Loc_FilterSZ))!=0,]
## Simple feature collection with 0 features and 4 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## # A tibble: 0 x 5
## # Groups: SUBZONE_N [0]
## # ... with 5 variables: SUBZONE_N <chr>, Pop <dbl>, TOTAL_TAP_IN_VOLUME <dbl>,
## # TOTAL_TAP_OUT_VOLUME <dbl>, geometry <GEOMETRY [m]>
isValid <- st_is_valid(MPSZ_BS_Loc_FilterSZ)
unique(isValid)
## [1] TRUE
tm_shape(MPSZ_BS_Loc_FilterSZ)+
tm_fill(col = "Pop",
n = 5,
style="jenks",
palette = "Pastel1",
title = "Residential Population Count") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Subzone Residential Dwelling Data from Department of Statistics Singapore & Master Plan Subzone Boundary from Urban Redevelopment Environment (URA)", position = c("left", "bottom"))+
tm_layout(title = "Subzone Filtered Residential Population Distribution 2019",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.06, 0.10, 0.10, 0.08))
Comparing this subzone filtered map to the one done prior, we can see that most of the surrounding islands in Singapore have been removed as part of the filtering process of removing areas with zero tap volume and residential population.
For all the linear regression models, our dependent variable y will be Tap In / Out volume while our independent x variable will be residential population.
TapInSLR <- lm(TOTAL_TAP_IN_VOLUME ~ Pop , data = MPSZ_BS_Loc_FilterSZ)
TapInSLR
##
## Call:
## lm(formula = TOTAL_TAP_IN_VOLUME ~ Pop, data = MPSZ_BS_Loc_FilterSZ)
##
## Coefficients:
## (Intercept) Pop
## 120673.58 20.38
The estimated regression line equation is: TOTAL_TAP_IN_VOLUME = (20.38*POPULATION) + 120673.58
summary(TapInSLR)
##
## Call:
## lm(formula = TOTAL_TAP_IN_VOLUME ~ Pop, data = MPSZ_BS_Loc_FilterSZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -799481 -127198 -67424 31303 1954431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.207e+05 2.075e+04 5.816 1.52e-08 ***
## Pop 2.038e+01 9.715e-01 20.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 294300 on 305 degrees of freedom
## Multiple R-squared: 0.5906, Adjusted R-squared: 0.5893
## F-statistic: 440 on 1 and 305 DF, p-value: < 2.2e-16
Our p-value for population at 2e-16 tells us that residential population is a significant variable, allowing us to reject the null hypothesis even at an alpha value of 0.001.
Based on the regression equation and our summary table, we can infer a positive correlation between tap in volume and residential population.
FilterSZ_TapInVol <- tm_shape(MPSZ_BS_Loc_FilterSZ)+
tm_fill(col = "TOTAL_TAP_IN_VOLUME",
n = 5,
style="jenks",
palette = "Oranges",
title = "Subzone Total Tap In Volume") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Subzone Filtered Total Tap In Volume",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
FilterSZ_TapInVol
TapInSLR_Model <- ggplot(data = TapInSLR, aes(x = Pop, y = TOTAL_TAP_IN_VOLUME)) +
geom_point(color = "Orange", size=2)+
geom_segment(aes(xend = Pop, yend = predict(TapInSLR))) +
labs(y ="Total Tap In Volume", x = "Residental Population") +
theme(panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey", size = (0.5)),
panel.grid.minor = element_line(size = (0.5), colour="grey")) +
geom_smooth(method = "lm", formula = y ~ x, color='black')
TapInSLR_Model
The Model indicates a positive linear relationship between Total Tap In Volume and Residential Population at the Subzone Level.
plot(TapInSLR$fitted.values, TapInSLR$residuals, pch = 8, col = "Orange", xlab = "Predicted Total Tap In Volume", ylab = "Residual Values (Total Tap In Volume)")
Outliers are observed in our regression line, this may be due to areas with train stations where it increases the volume of passengers.
A slight unequal variance of the data is observed at the regression line when plotting out the residuals.
MPSZ_BS_Loc_FilterSZ_RF <- cbind(MPSZ_BS_Loc_FilterSZ, residTapIn = resid(TapInSLR), predictTapIn = predict(TapInSLR), absResidTapIn = abs(resid(TapInSLR)))
MPSZ_BS_Loc_FilterSZ_RF[rowSums(is.na(MPSZ_BS_Loc_FilterSZ_RF))!=0,]
## Simple feature collection with 0 features and 7 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] SUBZONE_N Pop TOTAL_TAP_IN_VOLUME
## [4] TOTAL_TAP_OUT_VOLUME residTapIn predictTapIn
## [7] absResidTapIn geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(MPSZ_BS_Loc_FilterSZ_RF)
unique(isValid)
## [1] TRUE
TapInSLR_resid <- tm_shape(MPSZ_BS_Loc_FilterSZ_RF)+
tm_fill(col = "residTapIn",
n = 5,
style="jenks",
palette = "Spectral",
title = "Total Tap In Volume Residual") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Total Tap In Volume Residual Distribution",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
TapInSLR_resid
## Variable(s) "residTapIn" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
TapOutSLR <- lm(TOTAL_TAP_OUT_VOLUME ~ Pop , data = MPSZ_BS_Loc_FilterSZ)
TapOutSLR
##
## Call:
## lm(formula = TOTAL_TAP_OUT_VOLUME ~ Pop, data = MPSZ_BS_Loc_FilterSZ)
##
## Coefficients:
## (Intercept) Pop
## 121241.31 20.28
The estimated regression line equation is: TOTAL_TAP_OUT_VOLUME = (20.28*POPULATION) + 121241.31
summary(TapInSLR)
##
## Call:
## lm(formula = TOTAL_TAP_IN_VOLUME ~ Pop, data = MPSZ_BS_Loc_FilterSZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -799481 -127198 -67424 31303 1954431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.207e+05 2.075e+04 5.816 1.52e-08 ***
## Pop 2.038e+01 9.715e-01 20.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 294300 on 305 degrees of freedom
## Multiple R-squared: 0.5906, Adjusted R-squared: 0.5893
## F-statistic: 440 on 1 and 305 DF, p-value: < 2.2e-16
Our p-value for population is at 2e-16 here as well. Again, it tells us that residential population is a significant variable, allowing us to reject the null hypothesis even at an alpha value of 0.001.
Based on the regression equation and our summary table, we can infer a positive correlation between tap in volume and residential population.
FilterSZ_TapInVol <- tm_shape(MPSZ_BS_Loc_FilterSZ)+
tm_fill(col = "TOTAL_TAP_OUT_VOLUME",
n = 5,
style="jenks",
palette = "PuBu",
title = "Subzone Total Tap Out Volume") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Subzone Filtered Total Tap Out Volume",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
FilterSZ_TapInVol
TapOutSLR_Model <- ggplot(data = TapOutSLR, aes(x = Pop, y = TOTAL_TAP_OUT_VOLUME)) +
geom_point(color = "skyblue2", size=2) +
geom_segment(aes(xend = Pop, yend = predict(TapOutSLR))) +
labs(y ="Total Tap Out Volume", x = "Residental Population") +
theme(panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey", size = (0.5)),
panel.grid.minor = element_line(size = (0.5), colour="grey")) +
geom_smooth(method = "lm", formula = y ~ x, color='black')
TapOutSLR_Model
The Model indicates a positive linear relationship between Total Tap Out Volume and Residential Population at the Subzone Level.
plot(TapOutSLR$fitted.values, TapOutSLR$residuals, pch = 8, col = "skyblue2", xlab = "Predicted Total Tap Out Volume", ylab = "Residual Values (Total Tap In Volume)")
Outliers are observed in our regression line, this may be due to areas with train stations where it increases the volume of passengers.
A slight unequal variance of the data is observed at the regression line when plotting out the residuals here as well.
MPSZ_BS_Loc_FilterSZ_RF <- cbind(MPSZ_BS_Loc_FilterSZ_RF, residTapOut = resid(TapOutSLR), predictTapOut = predict(TapOutSLR), absResidTapOut = abs(resid(TapOutSLR)))
MPSZ_BS_Loc_FilterSZ_RF[rowSums(is.na(MPSZ_BS_Loc_FilterSZ_RF))!=0,]
## Simple feature collection with 0 features and 10 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] SUBZONE_N Pop TOTAL_TAP_IN_VOLUME
## [4] TOTAL_TAP_OUT_VOLUME residTapIn predictTapIn
## [7] absResidTapIn residTapOut predictTapOut
## [10] absResidTapOut geometry
## <0 rows> (or 0-length row.names)
isValid <- st_is_valid(MPSZ_BS_Loc_FilterSZ_RF)
unique(isValid)
## [1] TRUE
TapOutSLR_resid <- tm_shape(MPSZ_BS_Loc_FilterSZ_RF)+
tm_fill(col = "residTapOut",
n = 5,
style="jenks",
palette = "Spectral",
title = "Total Tap Our Volume Residual") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Total Tap Out Volume Residual Distribution",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
TapOutSLR_resid
## Variable(s) "residTapOut" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
centroids <- sf::st_centroid(MPSZ_BS_Loc_FilterSZ_RF$geometry)
wm_q <- poly2nb(MPSZ_BS_Loc_FilterSZ_RF, queen=TRUE)
summary(wm_q)
## Neighbour list object:
## Number of regions: 307
## Number of nonzero links: 1862
## Percentage nonzero weights: 1.975618
## Average number of links: 6.065147
## Link number distribution:
##
## 2 3 4 5 6 7 8 9 10 11 12 14 17
## 6 10 28 78 78 49 37 14 2 2 1 1 1
## 6 least connected regions:
## 15 70 174 265 270 287 with 2 links
## 1 most connected region:
## 298 with 17 links
plot(MPSZ_BS_Loc_FilterSZ_RF$geometry, border="lightgrey", main="Queen Contiguity-Based Neighbours", asp=1)
plot(wm_q, st_coordinates(centroids), pch = 5, cex = 0.6, add = TRUE, col= "hotpink2" )
wm_r <- poly2nb(MPSZ_BS_Loc_FilterSZ_RF, queen=FALSE)
summary(wm_r)
## Neighbour list object:
## Number of regions: 307
## Number of nonzero links: 1614
## Percentage nonzero weights: 1.712485
## Average number of links: 5.257329
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 13 14
## 1 5 23 70 92 62 29 17 4 2 1 1
## 1 least connected region:
## 270 with 1 link
## 1 most connected region:
## 298 with 14 links
plot(MPSZ_BS_Loc_FilterSZ_RF$geometry, border="lightgrey", main="Rook Contiguity-Based Neighbours", asp =1)
plot(wm_r, st_coordinates(centroids), pch = 5, cex = 0.6, add = TRUE, col = "brown3")
Queen Contiguity-Based Neighbours is observed to to display a greater number of links at the North, West and East zones, hence the difference of nonzero links between the two. Since busses offten travel between subzones, Queen Contiguity will be the preferred method for our analysis.
coords <- st_coordinates(centroids)
k1 <- knn2nb(knearneigh(coords))
k1dists <- unlist(nbdists(k1, coords, longlat = FALSE))
summary(k1dists)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 182.5 616.3 887.5 937.2 1169.8 5404.1
The summary indicates a large variance in distances between subzones. A possible cause is the difference is sizing of areas such as tuas being much relatively bigger and thus increasing the distance between centroids.
wm_d5405 <- dnearneigh(coords, 0, 5405, longlat = FALSE)
plot(MPSZ_BS_Loc_FilterSZ_RF$geometry, border="lightgrey", main="Fixed Distance-Based Weight Matrix")
plot(wm_d5405, coords, add=TRUE, pch = 19, cex = 0.6, col = "red")
This model indicates that the fixed distance method would not be ideal for our analysis.
###Computing Adaptive Distance Weight Matrix
For adaptive weight distance, we will be using a value of k=6 as is reflected as the queen contiguity’s average number of links.
wm_knn6 <- knn2nb(knearneigh(coords, k=6, longlat = FALSE))
wm_knn6
## Neighbour list object:
## Number of regions: 307
## Number of nonzero links: 1842
## Percentage nonzero weights: 1.954397
## Average number of links: 6
## Non-symmetric neighbours list
plot(MPSZ_BS_Loc_FilterSZ_RF$geometry, border="lightgrey", main= "Adaptive Distance-Based Weight Matrix (Nearest 6 Neighbours)")
plot(wm_knn6, coords, pch = 19, cex = 0.6, add = TRUE, col = "red")
This model indicates that it would be a much preferred useful method than fixed distance.
With the information gathered from comparing the two contiguity and distance methods, Queens contiguity will be preferred here. This is because the maximum number of neighbours indicated by queens contiguity is 17 thus making it not ideal for adaptive distance as it will likely affect our auto-correlation anlysis as we will be overlooking many of the neighbours.
dist <- nbdists(wm_q, coords, longlat = FALSE)
ids <- lapply(dist, function(x) 1/(x))
#ids
rswm_q <- nb2listw(wm_q, style="W", zero.policy = TRUE)
rswm_q
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 307
## Number of nonzero links: 1862
## Percentage nonzero weights: 1.975618
## Average number of links: 6.065147
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 307 94249 307 106.4054 1261.803
head(rswm_q$weights)
## [[1]]
## [1] 0.25 0.25 0.25 0.25
##
## [[2]]
## [1] 0.1111111 0.1111111 0.1111111 0.1111111 0.1111111 0.1111111 0.1111111
## [8] 0.1111111 0.1111111
##
## [[3]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## [[4]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
##
## [[5]]
## [1] 0.2 0.2 0.2 0.2 0.2
##
## [[6]]
## [1] 0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125
moran.test(MPSZ_BS_Loc_FilterSZ_RF$residTapIn, listw=rswm_q, zero.policy = TRUE, na.action=na.omit)
##
## Moran I test under randomisation
##
## data: MPSZ_BS_Loc_FilterSZ_RF$residTapIn
## weights: rswm_q
##
## Moran I statistic standard deviate = -1.0545, p-value = 0.8542
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.037755733 -0.003267974 0.001069537
moran.test(MPSZ_BS_Loc_FilterSZ_RF$residTapOut, listw=rswm_q, zero.policy = TRUE, na.action=na.omit)
##
## Moran I test under randomisation
##
## data: MPSZ_BS_Loc_FilterSZ_RF$residTapOut
## weights: rswm_q
##
## Moran I statistic standard deviate = -0.50383, p-value = 0.6928
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.019809059 -0.003267974 0.001077864
A high p-value is seen for both moran’s I tests with tap in and out. We are thus unable to reject the null hypothesis. Since we are unable to reject the null hypothesis, the Moran’s I statistic here is inadmissible.
This aligns with the relation indicated by the simple linear regression models calibrated above.
set.seed(1234)
TapInMCMI= moran.mc(MPSZ_BS_Loc_FilterSZ_RF$residTapIn, listw=rswm_q, nsim=999, zero.policy = TRUE, na.action=na.omit)
TapInMCMI
##
## Monte-Carlo simulation of Moran I
##
## data: MPSZ_BS_Loc_FilterSZ_RF$residTapIn
## weights: rswm_q
## number of simulations + 1: 1000
##
## statistic = -0.037756, observed rank = 142, p-value = 0.858
## alternative hypothesis: greater
hist(TapInMCMI$res, freq=TRUE, breaks=20, xlab="Simulated Tap In Moran's I")
abline(v=0, col="red")
moran.test(MPSZ_BS_Loc_FilterSZ_RF$residTapIn, listw=rswm_q, zero.policy = TRUE, na.action=na.omit)
##
## Moran I test under randomisation
##
## data: MPSZ_BS_Loc_FilterSZ_RF$residTapIn
## weights: rswm_q
##
## Moran I statistic standard deviate = -1.0545, p-value = 0.8542
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.037755733 -0.003267974 0.001069537
MI_corr_TapIn <- sp.correlogram(wm_q, MPSZ_BS_Loc_FilterSZ_RF$residTapIn, order=6, method="I", style="B", zero.policy = TRUE)
plot(MI_corr_TapIn)
set.seed(1234)
TapOutMCMI= moran.mc(MPSZ_BS_Loc_FilterSZ_RF$residTapOut, listw=rswm_q, nsim=999, zero.policy = TRUE, na.action=na.omit)
TapOutMCMI
##
## Monte-Carlo simulation of Moran I
##
## data: MPSZ_BS_Loc_FilterSZ_RF$residTapOut
## weights: rswm_q
## number of simulations + 1: 1000
##
## statistic = -0.019809, observed rank = 311, p-value = 0.689
## alternative hypothesis: greater
hist(TapOutMCMI$res, freq=TRUE, breaks=20, xlab="Simulated Tap In Moran's I")
abline(v=0, col="red")
moran.test(MPSZ_BS_Loc_FilterSZ_RF$absResidTapOut, listw=rswm_q, zero.policy = TRUE, na.action=na.omit)
##
## Moran I test under randomisation
##
## data: MPSZ_BS_Loc_FilterSZ_RF$absResidTapOut
## weights: rswm_q
##
## Moran I statistic standard deviate = 1.8125, p-value = 0.03495
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.055795360 -0.003267974 0.001061855
plot(TapInSLR$fitted.values, MPSZ_BS_Loc_FilterSZ_RF$absResidTapOut, pch = 19, col = "springgreen4", xlab = "Predicted Total Tap Out Volume", ylab = "Absoluate Total Tap Out Volume Residual")
absResiTapOutMap <- tm_shape(MPSZ_BS_Loc_FilterSZ_RF)+
tm_fill(col = "absResidTapOut",
n = 5,
style="jenks",
palette = "Spectral",
title = "Aboslute Residual Total Tap Out Volume") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Aboslute Residual Total Tap Out Volume Distribution",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08),
legend.title.size = 0.9,
legend.text.size = 0.7)
absResiTapOutMap
### (Tap Out) Compute Moran’s I Correlogram and Plot
MI_corr_TapOut <- sp.correlogram(wm_q, MPSZ_BS_Loc_FilterSZ_RF$residTapOut, order=6, method="I", style="B", zero.policy = TRUE)
plot(MI_corr_TapOut)
All results save for absolute tap out data also indicate that we cannot reject the null hypothesis. Further investigations into absolute tap out data indicate no clear pattern between it and the predicted tap out values and is likely a result of a geographically crowded area of low values.
Since localmoran() is a spdep function, we will need to convert our simple feature dataframe to a spatialpolygonsdataframe
SP_MPSZ_BS_Loc_FilterSZ_RF <- as_Spatial(MPSZ_BS_Loc_FilterSZ_RF)
fips <- order(SP_MPSZ_BS_Loc_FilterSZ_RF$SUBZONE_N)
localMITapIn <- localmoran(SP_MPSZ_BS_Loc_FilterSZ_RF$TOTAL_TAP_IN_VOLUME, rswm_q)
head(localMITapIn)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.49102287 -0.003267974 0.2349125 1.019834127 0.1539036
## 2 0.07616232 -0.003267974 0.1027717 0.247770333 0.4021561
## 3 0.30976586 -0.003267974 0.1556280 0.793500810 0.2137430
## 4 -0.00276244 -0.003267974 0.1556280 0.001281463 0.4994888
## 5 0.01099226 -0.003267974 0.1873418 0.032946496 0.4868586
## 6 -0.10125705 -0.003267974 0.1159858 -0.287723419 0.6132208
SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn <- cbind(SP_MPSZ_BS_Loc_FilterSZ_RF,localMITapIn)
localMITapInMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn) +
tm_fill(col = "Ii",
style = "pretty",
palette = "-Spectral",
title = "Tap-In Local Moran Statistics") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-In Local Moran Statisticsl Distribution",
title.size = 0.7,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08),
legend.title.size = 0.6,
legend.text.size = 0.6)
localMITapInP_ValMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-Purples",
title = "Tap-In Local Moran P-Values") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-In Local Moran P-Values Distribution",
title.size = 0.7,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08),
legend.title.size = 0.6,
legend.text.size = 0.6)
tmap_arrange(localMITapInMap, localMITapInP_ValMap, asp=1, ncol=2)
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
The mapping indicates several clusters for Tap-In values in the east area. The mapping also indicates a few outliers with low p values, thus we will scrutinise them in our analysis.
localMITapOut <- localmoran(SP_MPSZ_BS_Loc_FilterSZ_RF$TOTAL_TAP_OUT_VOLUME, rswm_q)
head(localMITapOut)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.46698843 -0.003267974 0.2352476 0.96955422 0.1661344
## 2 0.12429671 -0.003267974 0.1029157 0.39763959 0.3454479
## 3 0.27726990 -0.003267974 0.1558485 0.71062459 0.2386585
## 4 0.01032487 -0.003267974 0.1558485 0.03443175 0.4862664
## 5 0.01303410 -0.003267974 0.1876081 0.03763717 0.4849885
## 6 -0.19464884 -0.003267974 0.1161489 -0.56155325 0.7127898
SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut <- cbind(SP_MPSZ_BS_Loc_FilterSZ_RF,localMITapOut)
localMITapOutMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut) +
tm_fill(col = "Ii",
style = "pretty",
palette="-Spectral",
title = "Tap-Out Local Moran Statistics") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-Out Local Moran Statisticsl Distribution",
title.size = 0.7,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08),
legend.title.size = 0.6,
legend.text.size = 0.6)
localMITapOutP_ValMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-Blues",
title = "Tap-Out Local Moran P-Values") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-Out Local Moran P-Values Distribution",
title.size = 0.7,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08),
legend.title.size = 0.6,
legend.text.size = 0.6)
tmap_arrange(localMITapOutMap, localMITapOutP_ValMap, asp=1, ncol=2)
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
The mapping indicates several clusters for Tap-Out values in the east area. The mapping also indicates a few outliers with low p values, thus we will scrutinise them in our analysis.
nciTapIn <- moran.plot(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$TOTAL_TAP_IN_VOLUME, rswm_q, labels=as.character(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$SUBZONE_N), xlab="z Total Tap-In Volumes", ylab="Spatially Lag z Total Tap-In Volumes")
SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$Z.TOTAL_TAP_IN_VOLUME <- scale(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$TOTAL_TAP_IN_VOLUME) %>%
as.vector
nci2TapIn <- moran.plot(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$Z.TOTAL_TAP_IN_VOLUME, rswm_q, labels=as.character(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$SUBZONE_N), xlab="z Total Tap-In Volumes", ylab="Spatially Lag z Total Tap-In Volumes")
nciTapOut <- moran.plot(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$TOTAL_TAP_OUT_VOLUME, rswm_q, labels=as.character(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$SUBZONE_N), xlab="z Total Tap-Out Volumes", ylab="Spatially Lag z Total Tap-Out Volumes")
SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$Z.TOTAL_TAP_OUT_VOLUME <- scale(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$TOTAL_TAP_OUT_VOLUME) %>%
as.vector
nci2TapIn <- moran.plot(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$Z.TOTAL_TAP_OUT_VOLUME, rswm_q, labels=as.character(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$SUBZONE_N), xlab="z Total Tap-Out Volumes", ylab="Spatially Lag z Total Tap-Out Volumes")
quadrantTapIn <- vector(mode="numeric",length=nrow(localMITapIn))
DVTapIn <- SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$TOTAL_TAP_IN_VOLUME - mean(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$TOTAL_TAP_IN_VOLUME)
C_mITapIn <- localMITapIn[,1] - mean(localMITapIn[,1])
signif <- 0.05
quadrantTapIn[DVTapIn >0 & C_mITapIn>0] <- 4
quadrantTapIn[DVTapIn <0 & C_mITapIn<0] <- 1
quadrantTapIn[DVTapIn <0 & C_mITapIn>0] <- 2
quadrantTapIn[DVTapIn >0 & C_mITapIn<0] <- 3
quadrantTapIn[localMITapIn[,5]>signif] <- 0
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("Insignificant", "Low-Low", "Low-High", "High-Low", "High-High")
SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn$quadrantTapIn <- quadrantTapIn
LISA_ClusterMapTapIn <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapIn) +
tm_fill(col = "quadrantTapIn", style = "cat", title="Tap-In Classification", palette = colors[c(sort(unique(quadrantTapIn)))+1], labels = clusters[c(sort(unique(quadrantTapIn)))+1], popup.vars = c("Postal.Code")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Local Moran's I Tap-In LISA Cluster Map",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
LISA_ClusterMapTapIn
The mapping indicates the same area as indicated by our mapping done for Moran I and p values for Tap In volume. The mapping also indicates positive auto-correlation with the existence of other clusters in the north and west areas.
quadrantTapOut <- vector(mode="numeric",length=nrow(localMITapOut))
DVTapOut <- SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$TOTAL_TAP_OUT_VOLUME - mean(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$TOTAL_TAP_OUT_VOLUME)
C_mITapOut <- localMITapOut[,1] - mean(localMITapOut[,1])
signif <- 0.05
quadrantTapOut[DVTapOut >0 & C_mITapOut>0] <- 4
quadrantTapOut[DVTapOut <0 & C_mITapOut<0] <- 1
quadrantTapOut[DVTapOut <0 & C_mITapOut>0] <- 2
quadrantTapOut[DVTapOut >0 & C_mITapOut<0] <- 3
quadrantTapOut[localMITapOut[,5]>signif] <- 0
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("Insignificant", "Low-Low", "Low-High", "High-Low", "High-High")
SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut$quadrantTapOut <- quadrantTapOut
LISA_ClusterMapTapOut <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF_TapOut) +
tm_fill(col = "quadrantTapOut", style = "cat", title="Tap-Out Classification", palette = colors[c(sort(unique(quadrantTapOut)))+1], labels = clusters[c(sort(unique(quadrantTapOut)))+1], popup.vars = c("Postal.Code")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Local Moran's I Tap-Out LISA Cluster Map",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
LISA_ClusterMapTapOut
The mapping indicates the same area as indicated by our mapping done for Moran I and p values for Tap Out volume. The mapping also indicates positive auto-correlation with the existence of other clusters in the north and west areas.
Since we’ve determined fixed distance matrix is less preferred to adaptive distance, we will omnit fixed distance testing here.
wm_knn6_lw <- nb2listw(wm_knn6, style = 'B')
gi.adaptiveTapIn <- localG(SP_MPSZ_BS_Loc_FilterSZ_RF$TOTAL_TAP_IN_VOLUME, wm_knn6_lw)
SP_MPSZ_BS_Loc_FilterSZ_RF.giTapIn <- cbind(SP_MPSZ_BS_Loc_FilterSZ_RF, as.matrix(gi.adaptiveTapIn))
names(SP_MPSZ_BS_Loc_FilterSZ_RF.giTapIn)[11] <- "gstat_adaptive"
giTapInMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF.giTapIn) +
tm_fill(col = "gstat_adaptive",
style = "pretty",
palette = "-RdBu",
n=5,
title = "Tap-In local Gi") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-In local Gi Statistic Map",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
giTapInMap
## Variable(s) "gstat_adaptive" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
This mapping displays a hot spot areas as indicated by our LISA clustering map for Tap-In volume.
gi.adaptiveTapOut <- localG(SP_MPSZ_BS_Loc_FilterSZ_RF$TOTAL_TAP_OUT_VOLUME, wm_knn6_lw)
SP_MPSZ_BS_Loc_FilterSZ_RF.giTapOut <- cbind(SP_MPSZ_BS_Loc_FilterSZ_RF, as.matrix(gi.adaptiveTapOut))
names(SP_MPSZ_BS_Loc_FilterSZ_RF.giTapOut)[11] <- "gstat_adaptive"
giTapOutMap <- tm_shape(SP_MPSZ_BS_Loc_FilterSZ_RF.giTapOut) +
tm_fill(col = "gstat_adaptive",
style = "pretty",
palette = "-RdBu",
n=5,
title = "Tap-Out local Gi") +
tm_borders(alpha = 0.5) +
tm_credits("Source: Bus Stop Location Data and Passenger Volume by Busstop from Land Transport Authority (LTA), Master Plan Subzone Boundary from Urban Redevelopment Authority (URA)", position = c("left", "bottom"))+
tm_layout(title = "Tap-Out local Gi Statistic Map",
title.size = 1,
title.position = c("center", "top"),
inner.margins = c(0.07, 0.10, 0.10, 0.08))
giTapOutMap
## Variable(s) "gstat_adaptive" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
This mapping displays a hot spot areas as indicated by our LISA clustering map for Tap-Out volume.
The mappings done on Tap In / Out volume data indicate an observation of a cluster in the east area, especially in the Tampines area which lines up with earlier mappings indicating tampines_east subzone and its neigbours as an area with a higher population count relative to the other subzones.