Part 2 of Take-home Exercise 1: https://rpubs.com/keithlohyj/IS415_TH_Ex01_Part2
Overview: Geographic Analysis of the Supply and Demand of Childcare Services in Singapore in 2017 and 2020.
packages = c('rgdal', 'spatstat', 'raster', 'maptools', 'sf', 'tidyverse', 'tmap', 'knitr', 'tmaptools', 'OpenStreetMap', 'ggthemes','ggplot2','plotly')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
Dataset provided by Professor Kam in Hands-on_Ex04
childcare_2017 <- st_read(dsn = "data/childcare_2017", layer = "CHILDCARE")
## Reading layer `CHILDCARE' from data source `D:\School\Year 4 Sem 1\IS415 - Geospatial Analytics & Applns (SMU-X)\Assignment\Take-home_Ex01\data\childcare_2017' using driver `ESRI Shapefile'
## Simple feature collection with 1312 features and 18 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 11203.01 ymin: 25667.6 xmax: 45404.24 ymax: 49300.88
## projected CRS: SVY21
st_crs(childcare_2017)
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
EPSG is 9001, assign EPSG 3414 to childcare_2017
childcare_2017 <- st_set_crs(childcare_2017, 3414)
st_crs(childcare_2017)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
head(childcare_2017, 1)
## Simple feature collection with 1 feature and 18 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 34246.67 ymin: 33141.02 xmax: 34246.67 ymax: 33141.02
## projected CRS: SVY21 / Singapore TM
## OBJECTID ADDRESSBLO ADDRESSBUI ADDRESSPOS
## 1 1 <NA> <NA> 387908
## ADDRESSSTR ADDRESSTYP DESCRIPTIO
## 1 11 LORONG 37 GEYLANG SINGAPORE 387908 <NA> Child Care Services
## HYPERLINK
## 1 http://www.childcarelink.gov.sg/ccls/chdcentpart/ChdCentPartLnk.jsp?centreCd=PT8785
## LANDXADDRE LANDYADDRE NAME PHOTOURL ADDRESSFLO
## 1 0 0 FIRST JUNIOR PRESCHOOL <NA> <NA>
## INC_CRC FMEL_UPD_D ADDRESSUNI X_ADDR Y_ADDR
## 1 45DBE80EB321A9B5 2016-12-23 <NA> 34246.67 33141.02
## geometry
## 1 POINT (34246.67 33141.02)
#glimpse(childcare_2017)
The columns selected are information that could be used to uniquely identify the childcare services when combining data with other datasets.
childcare_2017 <- childcare_2017 %>%
dplyr::select(OBJECTID, ADDRESSPOS, ADDRESSSTR, NAME, geometry)
childcare_2017[rowSums(is.na(childcare_2017))!=0,]
## Simple feature collection with 0 features and 4 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID ADDRESSPOS ADDRESSSTR NAME geometry
## <0 rows> (or 0-length row.names)
There are no NA values in the childcare_2017 dataset.
Check for duplicated childcare services entry with the same Postal Code, Address, and Childcare Name. It would not be logical for different childcare centres to have the exact 3 attributes.
str(childcare_2017[duplicated(childcare_2017[,c('ADDRESSPOS', 'ADDRESSSTR', 'NAME')]), ])
## Classes 'sf' and 'data.frame': 0 obs. of 5 variables:
## $ OBJECTID : int
## $ ADDRESSPOS: chr
## $ ADDRESSSTR: chr
## $ NAME : chr
## $ geometry :sfc_GEOMETRY of length 0 - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
## ..- attr(*, "names")= chr [1:4] "OBJECTID" "ADDRESSPOS" "ADDRESSSTR" "NAME"
There are no duplicated entry in the childcare_2017 dataset.
childcare_2017_validity_test <- st_is_valid(childcare_2017)
length(which(childcare_2017_validity_test == FALSE))
## [1] 0
There are no invalid geometries. A quick plot to look at the output of childcare_2017
plot(childcare_2017$geometry)
Import the pre-schools location kml file that is downloaded from Singapore’s open data portal on 16 September 2020. Dataset last updated on 29 August 2020.
childcare_2020 <- st_read("data/childcare_2020/child-care-services-kml.kml")
## Reading layer `CHILDCARE' from data source `D:\School\Year 4 Sem 1\IS415 - Geospatial Analytics & Applns (SMU-X)\Assignment\Take-home_Ex01\data\childcare_2020\child-care-services-kml.kml' using driver `KML'
## Simple feature collection with 1545 features and 2 fields
## geometry type: POINT
## dimension: XYZ
## bbox: xmin: 103.6824 ymin: 1.248403 xmax: 103.9897 ymax: 1.462134
## z_range: zmin: 0 zmax: 0
## geographic CRS: WGS 84
st_crs(childcare_2020)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
Transform from WGS 84 EPSG 4326 to EPSG 3414
childcare_2020 <- st_transform(childcare_2020, 3414)
st_crs(childcare_2020)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
head(childcare_2020, 1)
## Simple feature collection with 1 feature and 2 fields
## geometry type: POINT
## dimension: XYZ
## bbox: xmin: 27976.73 ymin: 45716.7 xmax: 27976.73 ymax: 45716.7
## z_range: zmin: 0 zmax: 0
## projected CRS: SVY21 / Singapore TM
## Name
## 1 kml_1
## Description
## 1 <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>ADDRESSBLOCKHOUSENUMBER</th> <td></td> </tr><tr bgcolor=""> <th>ADDRESSBUILDINGNAME</th> <td></td> </tr><tr bgcolor="#E3E3F3"> <th>ADDRESSPOSTALCODE</th> <td>760742</td> </tr><tr bgcolor=""> <th>ADDRESSSTREETNAME</th> <td>742, YISHUN AVENUE 5, #01 - 470, SINGAPORE 760742</td> </tr><tr bgcolor="#E3E3F3"> <th>ADDRESSTYPE</th> <td></td> </tr><tr bgcolor=""> <th>DESCRIPTION</th> <td>Child Care Services</td> </tr><tr bgcolor="#E3E3F3"> <th>HYPERLINK</th> <td></td> </tr><tr bgcolor=""> <th>LANDXADDRESSPOINT</th> <td>0</td> </tr><tr bgcolor="#E3E3F3"> <th>LANDYADDRESSPOINT</th> <td>0</td> </tr><tr bgcolor=""> <th>NAME</th> <td>AVERBEL CHILD DEVELOPMENT CENTRE PTE LTD</td> </tr><tr bgcolor="#E3E3F3"> <th>PHOTOURL</th> <td></td> </tr><tr bgcolor=""> <th>ADDRESSFLOORNUMBER</th> <td></td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>AEA27114446235CE</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20200826094036</td> </tr><tr bgcolor="#E3E3F3"> <th>ADDRESSUNITNUMBER</th> <td></td> </tr></table></center>
## geometry
## 1 POINT Z (27976.73 45716.7 0)
# glimpse(childcare_2020)
Description variable seems to be a html string, there are some information within that could be of use for the analysis. Extract the information from the html string using gsub() and str_match(), and store them in their own columns. Information extracted: “ADDRESSPOS”, “ADDRESSSTR”, “NAME” (naming convention to match childcare_2017 columns). These are information that could be used to uniquely identify the childcare services when combining data with other datasets.
removeAllHtmlTagsFn <- function(htmlString) {
return(gsub("<.*?>", "", htmlString))
}
extractChildcarePostalCodeFn <- function(inputString) {
return(str_match(inputString, "\\sADDRESSPOSTALCODE\\s*(.*?)\\s*ADDRESSSTREETNAME")[,2])
}
extractChildcareAddressFn <- function(inputString) {
return(str_match(inputString, "\\sADDRESSSTREETNAME\\s*(.*?)\\s*ADDRESSTYPE")[,2])
}
extractChildcareNameFn <- function(inputString) {
return(str_match(inputString, "\\sNAME\\s*(.*?)\\s*PHOTOURL")[,2])
}
childcare_2020 <- childcare_2020 %>%
rename(OBJECTID = Name) %>% # rename Name to OBJECTID to avoid confusion
mutate(`No_Tag_Description` = removeAllHtmlTagsFn(childcare_2020$Description)) %>%
mutate(`ADDRESSPOS` = extractChildcarePostalCodeFn(No_Tag_Description)) %>%
mutate(`ADDRESSSTR` = extractChildcareAddressFn(No_Tag_Description)) %>%
mutate(`NAME` = extractChildcareNameFn(No_Tag_Description)) %>%
dplyr::select(-Description, -No_Tag_Description) #remove variables that are no longer needed
head(childcare_2020, 1)
## Simple feature collection with 1 feature and 4 fields
## geometry type: POINT
## dimension: XYZ
## bbox: xmin: 27976.73 ymin: 45716.7 xmax: 27976.73 ymax: 45716.7
## z_range: zmin: 0 zmax: 0
## projected CRS: SVY21 / Singapore TM
## OBJECTID ADDRESSPOS ADDRESSSTR
## 1 kml_1 760742 742, YISHUN AVENUE 5, #01 - 470, SINGAPORE 760742
## NAME geometry
## 1 AVERBEL CHILD DEVELOPMENT CENTRE PTE LTD POINT Z (27976.73 45716.7 0)
childcare_2020[rowSums(is.na(childcare_2020))!=0,]
## Simple feature collection with 0 features and 4 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID ADDRESSPOS ADDRESSSTR NAME geometry
## <0 rows> (or 0-length row.names)
There are no NA values in the childcare_2020 dataset.
Check for duplicated childcare services entry with the same Postal Code, Address, and Childcare Name. It would not be logical for different childcare centres to have the exact 3 attributes.
str(childcare_2020[duplicated(childcare_2020[,c('ADDRESSPOS', 'ADDRESSSTR', 'NAME')]), ])
## Classes 'sf' and 'data.frame': 0 obs. of 5 variables:
## $ OBJECTID : chr
## $ ADDRESSPOS: chr
## $ ADDRESSSTR: chr
## $ NAME : chr
## $ geometry :sfc_GEOMETRY of length 0 - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
## ..- attr(*, "names")= chr [1:4] "OBJECTID" "ADDRESSPOS" "ADDRESSSTR" "NAME"
There are no duplicated entry in the childcare_2020 dataset.
childcare_2020_validity_test <- st_is_valid(childcare_2020)
length(which(childcare_2020_validity_test == FALSE))
## [1] 0
There are no invalid geometries. A quick plot to look at the output of childcare_2020.
plot(childcare_2020$geometry)
Import the listing of centres csv file that is downloaded from Singapore’s open data portal on 16 September 2020. Dataset last updated on 16 September 2020.
centre_listing <- st_read("data/listing-of-centres/listing-of-centres.csv")
## Reading layer `listing-of-centres' from data source `D:\School\Year 4 Sem 1\IS415 - Geospatial Analytics & Applns (SMU-X)\Assignment\Take-home_Ex01\data\listing-of-centres\listing-of-centres.csv' using driver `CSV'
Select columns that can uniquely identify a centre and also the organisation_description that will be able to tell us if the services are provided by private operators or government-linked companies.
centre_listing <- centre_listing %>%
select(centre_name, organisation_code, organisation_description, centre_address, postal_code)
Standardize the format of the childcare centres’ names and addresses as different datasets have different formats which will complicate combining of datasets.
formatNameFn <- function(inputString){
inputString <-toupper(inputString)
inputString <- gsub("[[:punct:][:blank:]]+", "",inputString)
return(inputString)
}
formatAddressFn <- function(inputString){
inputString <-toupper(inputString)
inputString <- str_replace(inputString, "BLK", "")
inputString <- str_replace(inputString, "SINGAPORE", "")
inputString <- gsub("[[:punct:][:blank:]]+", "",inputString)
return(inputString)
}
childcare_2017 <- childcare_2017 %>%
mutate(formatted_address = formatAddressFn(childcare_2017$ADDRESSSTR)) %>%
mutate(formatted_name = formatNameFn(childcare_2017$NAME))
childcare_2020 <- childcare_2020 %>%
mutate(formatted_address = formatAddressFn(childcare_2020$ADDRESSSTR)) %>%
mutate(formatted_name = formatNameFn(childcare_2020$NAME))
centre_listing <- centre_listing %>%
mutate(formatted_address = formatAddressFn(centre_listing$centre_address)) %>%
mutate(formatted_name = formatNameFn(centre_listing$centre_name))
Check for duplicates, especially if the postal code, name, and address (#unit number) are the same
(centre_listing[duplicated(centre_listing[,c('centre_address', 'postal_code', 'centre_name')]), ])
## centre_name organisation_code
## 1902 PCF Sparkletots Preschool @ Keat Hong Blk 801 (DS) ST
## organisation_description centre_address postal_code
## 1902 PAP Community Foundation 801,Keat Hong Close,#01-01,680801 680801
## formatted_address formatted_name
## 1902 801KEATHONGCLOSE0101680801 PCFSPARKLETOTSPRESCHOOLKEATHONGBLK801DS
There is 1 duplicated row, which might cause issues when combining dataset. As such, we shall exclude the exact duplicate from the centre_listing dataset. (From 1906 observations to 1905 observation)
centre_listing <- centre_listing[!duplicated(centre_listing[,c('centre_address', 'postal_code', 'centre_name')]), ]
Combine the information from the centre_listing datasets to the childcare_2017 and childcare_2020 datasets.
The only common identifier of the childcare centres between the centre_listing dataset and the childcare datasets are the centre’s name, address, and postal code variables.
The join condition shall be the centre’s name and postal code only as the centre’s address might still be of different format even after the formatting done above (For instance, Blk 03 vs Blk 3). Having only the 2 selected join conditions seems to be optimal as the number of rows with NAs is lower than when there are 3 join conditions. (From 419 rows to 351 rows in the 2017 dataset and from 211 rows to 38 rows in the 2020 dataset). Joining by postal code or centre name alone is not ideal as there could be multiple childcare centres with the same values.
childcare_2017 <- left_join(childcare_2017, centre_listing, by=c('ADDRESSPOS' = 'postal_code', 'formatted_name' = 'formatted_name'))
childcare_2017 <- childcare_2017 %>%
select(ADDRESSPOS, ADDRESSSTR, NAME, organisation_code, organisation_description)
childcare_2017[rowSums(is.na(childcare_2017))!=0,]
## Simple feature collection with 351 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 11810.03 ymin: 28400.38 xmax: 42700.62 ymax: 48696.35
## projected CRS: SVY21 / Singapore TM
## First 10 features:
## ADDRESSPOS
## 1 387908
## 4 520114
## 7 159837
## 8 129040
## 9 787013
## 10 680229
## 11 599057
## 21 529763
## 22 389805
## 37 545270
## ADDRESSSTR
## 1 11 LORONG 37 GEYLANG SINGAPORE 387908
## 4 Blk 114 SIMEI STREET 1 #01 - 614 SINGAPORE 520114
## 7 3500A BUKIT MERAH CENTRAL BUKIT MERAH SWIMMING COMPLEX SINGAPORE 159837
## 8 60 FABER TERRACE FABER HILLS SINGAPORE 129040
## 9 1 Springside Avenue SINGAPORE 787013
## 10 Blk 229 CHOA CHU KANG CENTRAL #01 - 139 SINGAPORE 680229
## 11 64 ENG KONG ROAD SINGAPORE 599057
## 21 45 Tampines Avenue 1 #01 - 05 Temasek Green SINGAPORE 529763
## 22 20 ALJUNIED ROAD #02 - 01/02 CPA HOUSE SINGAPORE 389805
## 37 2 ST ANNE'S WOOD SINGAPORE 545270
## NAME organisation_code
## 1 FIRST JUNIOR PRESCHOOL <NA>
## 4 DISCOVERY WHIZ KIDZ EMPIRE PTE. LTD. <NA>
## 7 EAGER BEAVER SCHOOLHOUSE 2 PTE. LTD. <NA>
## 8 EAGER BEAVER SCHOOLHOUSE 3 PTE. LTD. <NA>
## 9 EAGLE RETIREMENT PLANNERS PTE LTD <NA>
## 10 EARLY LEARNING CENTRE PTE LTD <NA>
## 11 EARLY LEARNING'S FUN PRESCHOOL PTE. LTD. <NA>
## 21 BRIGHT JUNIORS PTE. LTD. <NA>
## 22 THE LITTLE CHAMPIONS (TLC) PTE. LTD. <NA>
## 37 AMAZING STAR MONTESSORI HOUSE (SK) <NA>
## organisation_description geometry
## 1 <NA> POINT (34246.67 33141.02)
## 4 <NA> POINT (41034.07 36221.83)
## 7 <NA> POINT (26303.87 29388.7)
## 8 <NA> POINT (19550.92 33770.18)
## 9 <NA> POINT (26398.5 42776.08)
## 10 <NA> POINT (18323.41 40291.37)
## 11 <NA> POINT (20716.78 35359.81)
## 21 <NA> POINT (38556 36614.36)
## 22 <NA> POINT (33435.31 32962.05)
## 37 <NA> POINT (35787.8 41421.12)
There are 351 rows that contains NA values. The childcare centres in the 2017 dataset might have been renamed or have a different postal code over the years. We shall assign “Unknown” to the NA values in the organisation code and organisation description columns.
childcare_2017$organisation_code[is.na(childcare_2017$organisation_code)] <- "Unknown"
childcare_2017$organisation_description[is.na(childcare_2017$organisation_description)] <- "Unknown"
childcare_2017[rowSums(is.na(childcare_2017))!=0,]
## Simple feature collection with 0 features and 5 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] ADDRESSPOS ADDRESSSTR NAME
## [4] organisation_code organisation_description geometry
## <0 rows> (or 0-length row.names)
The NA values in childcare_2017 are taken care of.
Next, get the number of each unique organisation type. “NTUC First Campus Co-Operative Ltd” and “PAP Community Foundation” shall be further categorised as Government-linked, Unknown organisation type will remain as Unknown, and the rest shall be further categorised as Private.
organisationCategoryFn <- function(inputOrganisation){
if(inputOrganisation %in% c("NTUC First Campus Co-Operative Ltd", "PAP Community Foundation")){
return("Government-linked")
}
else if(inputOrganisation == "Unknown"){
return("Unknown")
}
else{
return("Private")
}
}
childcare_2017 <- childcare_2017 %>%
group_by(organisation_code, organisation_description) %>%
mutate(`organisation_type` = organisationCategoryFn(organisation_description)) %>%
ungroup()
Repeat the steps for the 2020 dataset.
childcare_2020 <- left_join(childcare_2020, centre_listing, by=c('ADDRESSPOS' = 'postal_code', 'formatted_name' = 'formatted_name'))
childcare_2020 <- childcare_2020 %>%
select(ADDRESSPOS, ADDRESSSTR, NAME, organisation_code, organisation_description)
childcare_2020[rowSums(is.na(childcare_2020))!=0,]
## Simple feature collection with 38 features and 5 fields
## geometry type: POINT
## dimension: XYZ
## bbox: xmin: 12328.09 ymin: 28008.54 xmax: 40545.64 ymax: 45687.11
## z_range: zmin: 0 zmax: 0
## projected CRS: SVY21 / Singapore TM
## First 10 features:
## ADDRESSPOS
## 147 309975
## 183 613140
## 240 339351
## 275 118283
## 380 425500
## 415 229492
## 478 069533
## 486 588406
## 489 159837
## 509 530565
## ADDRESSSTR
## 147 11 Akyab Road, Singapore S(309975)
## 183 140C, CORPORATION DRIVE, #01 - 68, SINGAPORE 613140
## 240 535, KALLANG BAHRU, #03 - 01, SINGAPORE 339351
## 275 4A, RUSSELS ROAD, SINGAPORE 118283
## 380 55, Lorong L Telok Kurau, #03 - 57, Bright Centre, SINGAPORE 425500
## 415 420, CLEMENCEAU AVENUE NORTH, #01-02, SINGAPORE 229492
## 478 101, Cecil Street, #01 - 02, Tong Eng Building, SINGAPORE 069533
## 486 131, RIFLE RANGE ROAD, #03 - 01, TEMASEK CLUB, SINGAPORE 588406
## 489 2 Jalan Kilang Barat #01-01, Singapore 159346
## 509 565, HOUGANG STREET 51, #01 - 480, SINGAPORE 530565
## NAME organisation_code
## 147 Children’s Cove Preschool Pte Ltd <NA>
## 183 E-BRIDGE PRE-SCHOOL PTE LTD <NA>
## 240 GREENLAND CHILDCARE PTE. LTD. <NA>
## 275 CANTERBURY BABY COVE PTE. LTD. <NA>
## 380 LITTLE FELLOW CHILDCARE <NA>
## 415 KIDDIWINKIE SCHOOLHOUSE PTE. LTD. <NA>
## 478 LEARNING VISION @ WORK PTE. LTD. <NA>
## 486 LEARNING VISION @ WORK PTE. LTD. <NA>
## 489 EAGER BEAVER SCHOOLHOUSE 2 PTE. LTD. <NA>
## 509 IYAD-PERDAUS NURTURE CARE <NA>
## organisation_description geometry
## 147 <NA> POINT Z (29661.85 34014.84 0)
## 183 <NA> POINT Z (15865.95 34418.12 0)
## 240 <NA> POINT Z (32164.22 33727.81 0)
## 275 <NA> POINT Z (23876.55 29708.14 0)
## 380 <NA> POINT Z (36595.93 32680.1 0)
## 415 <NA> POINT Z (28732.54 32522.43 0)
## 478 <NA> POINT Z (29788.9 29268.26 0)
## 486 <NA> POINT Z (22151.28 36502.8 0)
## 489 <NA> POINT Z (26303.87 29388.7 0)
## 509 <NA> POINT Z (34080.15 40276.28 0)
There are 38 rows that contains NA values. The could be due to the different naming convention of childcare centres in the centre_listing and childcare_2020 dataset. We shall assign “Unknown” to the NA values in the organisation code and organisation description columns.
childcare_2020$organisation_code[is.na(childcare_2020$organisation_code)] <- "Unknown"
childcare_2020$organisation_description[is.na(childcare_2020$organisation_description)] <- "Unknown"
childcare_2020[rowSums(is.na(childcare_2020))!=0,]
## Simple feature collection with 0 features and 5 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] ADDRESSPOS ADDRESSSTR NAME
## [4] organisation_code organisation_description geometry
## <0 rows> (or 0-length row.names)
The NA values in childcare_2020 are taken care of.
Generate organisation type:
childcare_2020 <- childcare_2020 %>%
group_by(organisation_code, organisation_description) %>%
mutate(`organisation_type` = organisationCategoryFn(organisation_description)) %>%
ungroup()
Import the Master Plan 2014 Subzone Boundary (Web) shapefile dataset that is downloaded from Singapore’s open data portal on 16 September 2020. Dataset last updated on 4 November 2016.
mpsz <- st_read(dsn = "data/mpsz2014", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `D:\School\Year 4 Sem 1\IS415 - Geospatial Analytics & Applns (SMU-X)\Assignment\Take-home_Ex01\data\mpsz2014' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
st_crs(mpsz)
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
EPSG is 9001, assign EPSG 3414 to chidcare_2017
mpsz <- st_set_crs(mpsz, 3414)
st_crs(mpsz)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
mpsz[rowSums(is.na(mpsz))!=0,]
## Simple feature collection with 0 features and 15 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## [7] PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## [13] Y_ADDR SHAPE_Leng SHAPE_Area geometry
## <0 rows> (or 0-length row.names)
There are no NA values.
mpsz_validity_test <- st_is_valid(mpsz)
length(which(mpsz_validity_test == FALSE))
## [1] 9
There are 9 invalid geometries, and these are the reasons:
mpsz_validity_test_reason <- st_is_valid(mpsz, reason = TRUE)
mpsz_validity_test_reason[which(mpsz_validity_test_reason != "Valid Geometry")]
## [1] "Ring Self-intersection[27932.3925999999 21982.7971999999]"
## [2] "Ring Self-intersection[26885.4439000003 26668.3121000007]"
## [3] "Ring Self-intersection[26920.1689999998 26978.5440999996]"
## [4] "Ring Self-intersection[15432.4749999996 31319.716]"
## [5] "Ring Self-intersection[12861.3828999996 32207.4923]"
## [6] "Ring Self-intersection[19681.2353999997 31294.4521999992]"
## [7] "Ring Self-intersection[41375.108 40432.8588999994]"
## [8] "Ring Self-intersection[38542.2260999996 44605.4089000002]"
## [9] "Ring Self-intersection[21702.5623000003 48125.1154999994]"
mpsz<- st_make_valid(mpsz)
mpsz_validity_test <- st_is_valid(mpsz)
length(which(mpsz_validity_test == FALSE))
## [1] 0
There are no more invalid geometries in the dataset. A quick plot to look at the output of mpsz.
plot(mpsz$geometry)
Import the csv that is downloaded from Singapore Department of Statistics > Population Trends > Singapore Residents by Planning AreaSubzone, Age Group, Sex and Type of Dwelling, June 2011-2019 on 16 September 2020.
popdata <- read_csv("data/respopagesextod2011to2019/respopagesextod2011to2019.csv")
As we are looking at childcare services, the age group that are of interest are those between 18 months and below 7 years old. (Information from Early Childhood Development Agency Website).
Although 18 months is more than 1 year old, we shall assume that they will turn 18 months in the same year and include the entire population of 1 year olds. Assuming that the individual ages in the age group in the population dataset are evenly distributed, we shall extract the population size of ages 1 to 4 from the 0_to_4 age group, and ages 5 to 6 from the 5_to_9 age group to form the demand of childcare services for 2017 and 2020 respectively at the planning subzone level.
popdata2017 <- popdata %>%
filter(Time == 2017) %>%
group_by(PA,SZ,AG) %>%
summarize(`POP` = sum(`Pop`)) %>%
ungroup() %>%
spread(AG, POP) %>%
dplyr::select(PA, SZ, `0_to_4`, `5_to_9`) %>%
mutate(`1_to_4` = 4/5*`0_to_4`) %>%
mutate(`5_to_6` = 2/5*`5_to_9`) %>%
mutate(`Age_1_to_6_pop` = rowSums(.[5:6])) %>%
dplyr::select(PA, SZ, Age_1_to_6_pop) %>%
mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper))
popdata2020 <- popdata %>%
filter(Time == 2019) %>%
group_by(PA,SZ,AG) %>%
summarize(`POP` = sum(`Pop`)) %>%
ungroup() %>%
spread(AG, POP) %>%
dplyr::select(PA, SZ, `0_to_4`, `5_to_9`) %>%
mutate(`1_to_4` = 4/5*`0_to_4`) %>%
mutate(`5_to_6` = 2/5*`5_to_9`) %>%
mutate(`Age_1_to_6_pop` = rowSums(.[5:6])) %>%
dplyr::select(PA, SZ, Age_1_to_6_pop) %>%
mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper))
mpszpop2017 <- left_join(mpsz, popdata2017, by = c("SUBZONE_N" = "SZ"))
mpszpop2020 <- left_join(mpsz, popdata2020, by = c("SUBZONE_N" = "SZ"))
mpszpop2017[rowSums(is.na(mpszpop2017))!=0,]
## Simple feature collection with 0 features and 17 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND
## [6] PLN_AREA_N PLN_AREA_C REGION_N REGION_C INC_CRC
## [11] FMEL_UPD_D X_ADDR Y_ADDR SHAPE_Leng SHAPE_Area
## [16] PA Age_1_to_6_pop geometry
## <0 rows> (or 0-length row.names)
There are no NA values in mpszpop2017.
mpszpop2020[rowSums(is.na(mpszpop2020))!=0,]
## Simple feature collection with 0 features and 17 fields
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## projected CRS: SVY21 / Singapore TM
## [1] OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND
## [6] PLN_AREA_N PLN_AREA_C REGION_N REGION_C INC_CRC
## [11] FMEL_UPD_D X_ADDR Y_ADDR SHAPE_Leng SHAPE_Area
## [16] PA Age_1_to_6_pop geometry
## <0 rows> (or 0-length row.names)
There are no NA values in mpszpop2020.
tm_shape(mpszpop2017)+
tm_fill(col="Age_1_to_6_pop", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
The plot above shows the population of 1 to 6 year olds at each planning subzone in Singapore for 2017. However, subzones with a larger area/size typically tends have to have a higher population count. As such, to make the comparison among subzones we have to remove the influence of the subzone area. Using the SHAPE_Area column that contains area of planning subzones, we calculate the population density which is the population of 1 to 6 year olds per km^2 in each subzone.
# mpszpop2017$Area <- mpszpop2017 %>%
# st_area()
mpszpop2017 <- mpszpop2017 %>%
mutate(pop_density = (Age_1_to_6_pop/SHAPE_Area)*1000000)
ggplot(mpszpop2017, aes(pop_density)) +
geom_histogram() +
theme_classic() +
labs(y= "Frequency", x = "Population density per km^2 of 1 to 6 year olds in subzones")
ggplot(mpszpop2017, aes(y = Age_1_to_6_pop, x= pop_density)) +
geom_point() +
theme_classic() +
labs(y= "Population of 1 to 6 year olds in subzones", x = "Population density per km^2 of 1 to 6 year olds in subzones")
summary(mpszpop2017$pop_density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 283.2 602.2 943.3 4233.3
tm_shape(mpszpop2017)+
tm_fill(col="pop_density", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
Based on the plots above, out of the 323 planning subzones, majority (75% of planning subzones) have a population density that is 943.3 per km^2 and below. There appears to be clusters of subzones with high population density in the North-East Region and West Region of Singapore. There are 2 subzones with values that are in the highest range, Matilda has a population density of 4233.2840 per km^2 while Waterway East has 3701.1996 per km^2. Both of the subzones are under the Punggol Planning Area and in the North-East Region. There are multiple subzones with 0 population density.
mpszpop2017$`childcare_count` <- lengths(st_intersects(mpszpop2017, childcare_2017))
Childcare centres in 2017:
tm_shape(mpszpop2017) +
tm_polygons(col="white") +
tm_shape(childcare_2017) +
tm_dots(col="black", size=0.1, alpha=0.3) +
tmap_style("white")
There appears to be clusters of childcare centres at multiple subzones, especially those at the North-East, North, and West Regions. There are also many subzones with very little or no childcare centres.
tm_shape(mpszpop2017)+
tm_fill(col="childcare_count", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
The plot above shows the number of childcare centres at each planning subzone in Singapore for 2017. Similarly, subzones with a larger area/size typically tends have to have a higher childcare count. We shall look at the childcare centre density instead, which is the number of childcare centres per km^2 in each subzone.
mpszpop2017 <- mpszpop2017 %>%
mutate(childcare_density = (childcare_count/SHAPE_Area)*1000000)
ggplot(mpszpop2017, aes(childcare_density)) +
geom_histogram() +
theme_classic() +
labs(y= "Frequency", x = "Childcare centres density per km^2 in subzones")
ggplot(mpszpop2017, aes(y = childcare_count, x= childcare_density)) +
geom_point() +
theme_classic() +
labs(y= "Number of childcare centres in subzones", x = "Childcare centres density per km^2 in subzones")
summary(mpszpop2017$childcare_density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.884 3.908 6.483 27.945
tm_shape(mpszpop2017)+
tm_fill(col="childcare_density", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
Based on the plots above, out of the 323 planning subzones, majority (75% of planning subzones) have a childcare centre density of 6.483 per km^2 and below at their respective subzones. It seems like there are a few subzones at the Central Region of Singapore with relatively high childcare_density as compared to the rest of Singapore. There are 2 subzones with values that falls into the highest range, Mandai Estate has a childcare centre density of 27.945072 per km^2 while Phillip has 25.356297 per km^2. There are multiple subzones with 0 childcare centre density.
There are two planning subzones in Singapore, namely the Central Water Catchment and Western Water Catchment, that are expected to not have any childcare centres. However, based on the previous plots, there appears to be a childcare centre in the Western Water Catchment. This section will be to determine if the childcare centre is valid.
western_catchment_bounding_box <- st_bbox(mpszpop2017 %>% filter(SUBZONE_N == "WESTERN WATER CATCHMENT"))
western_catchment_osm <- read_osm(western_catchment_bounding_box, ext=1.1)
The postal code of the childcare centre in question is 637034.
investigate_childcare <- childcare_2017 %>%
filter(ADDRESSPOS == 637034)
tm_shape(western_catchment_osm)+
tm_rgb() +
tm_shape(mpsz)+
tm_polygons(alpha = 0, border.col = "black", border.alpha = 0.8, lwd = 2, lty="longdash") +
tm_shape(investigate_childcare)+
tm_dots(col="blue",
alpha=0.7,
size = 0.3,
border.col = "black",
border.lwd = 5)
The location of the childcare centre (blue dot on the map) when plotted on an openstreetmap of Singapore appears to be valid, especially when it is not on a green patch of land). Further research online reveals that the childcare centre is “Learning Vision @ NTU (Nanyang Technological University)”, which is located at a building within the compounds of a local university. As such, for the rest of the analysis, this childcare centre will be treated as a valid centre and will not be removed from the dataset, regardless of whether it is 2017 or 2020.
Based on the report on the Statistics on ECDC services by ECDA at ECDA Website that is accurate as of 16 September 2020, the average number of places/intake a childcare centre offers/supplies is about 107 spaces*.
| As at end of | 2016 | 2017 | 2018 | 2019 | 2020 (1st Half) |
|---|---|---|---|---|---|
| Total no. of centres: | 1,324 | 1,380 | 1,479 | 1,532 | 1,538 |
| Total no. of places: | 128,207 | 139,359 | 156,950 | 166,235 | 169,785 |
With the average number of spaces that each childcare centre can supply, we will be able to know if a planning subzone is over or under supplied.
For example, at Woodlands East, the population of 1 to 6 years old is 5808, and there are 36 childcare centres at that area, we can see that the demand for childcare services is at 5808 and the supply of childcare services is 3852 (36 centres * 107 places). This shows that there is more demand than supply at Woodlands East.
For this analysis, we will assume that the entire population of 1 to 6 year old requires childcare services.
average_number_of_places_per_childcare <- 107
supplyDemandCategoryFn <- function(childrenPop, childcareCount){
if(childrenPop > 0 & childcareCount > 0){
num_childcare_places_per_subzone <- childcareCount * average_number_of_places_per_childcare
if(childrenPop > num_childcare_places_per_subzone){
result<-"More Demand than Supply"
}
else if(childrenPop == num_childcare_places_per_subzone){
result<-"Supply Meets Demand"
}
else if(childrenPop < num_childcare_places_per_subzone){
result<-"More Supply than Demand"
}
}
else{
if(childrenPop > 0 & childcareCount == 0){
result <- "No Supply"
}
else if(childrenPop == 0 & childcareCount > 0){
result <- "No Demand"
}
else{
result <- "No Supply and Demand"
}
}
return(result)
}
percentDemandFn <- function(childrenPop, childcareCount){
if(childrenPop > 0 & childcareCount > 0){
num_childcare_places_per_subzone <- childcareCount * average_number_of_places_per_childcare
result <- round((childrenPop/num_childcare_places_per_subzone)*100,2)
}
else{
result <- round(0,2)
}
return(result)
}
mpszpop2017 <- mpszpop2017 %>%
rowwise() %>%
mutate(Supply_Demand_Categories = supplyDemandCategoryFn(Age_1_to_6_pop, childcare_count)) %>%
mutate(percent_demand = percentDemandFn(Age_1_to_6_pop, childcare_count)) %>%
ungroup()
mpszpop2017 <- st_as_sf(mpszpop2017)
sd_cat_freq_2017 <- mpszpop2017 %>%
group_by(Supply_Demand_Categories) %>%
summarize(sd_cat_count = n())
ggplot(sd_cat_freq_2017, aes(x=Supply_Demand_Categories, y=sd_cat_count)) +
geom_bar(position = 'dodge', stat = "identity", width=0.2) +
geom_text(aes(label=sd_cat_count), position=position_dodge(width=0.9), vjust=-0.25) +
theme_classic() +
labs(y= "Number of Each Supply and Demand Category", x = "Supply and Demand Categories") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
(supply_demand_categories_2017_plot <- tm_shape(mpszpop2017) +
tm_polygons(col = "Supply_Demand_Categories", palette="Accent") +
tm_layout("2017"))
To put into perspective of how much more or less childcare services there should be at each subzone, we shall look at the percent of the demand of childcare services vs the supply of services offered. Percent_demand is measured by taking the population of 1 to 6 year olds in a subzone divided by the total number of places/intakes of childcare centres in the same subzone, multiplied by 100.
summary(mpszpop2017$percent_demand)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 63.55 109.03 171.41 770.09
no_or_more_supply_2017_plot <-
tm_shape(mpszpop2017) +
tm_polygons() +
tm_shape(mpszpop2017[mpszpop2017$percent_demand>0 & mpszpop2017$percent_demand<=100, ]) +
tm_polygons(col = "percent_demand", style="equal", palette="-Greens") +
tm_layout("2017: >0% and <=100%", legend.position = c("right", "bottom"))
more_demand_2017_plot <-
tm_shape(mpszpop2017) +
tm_polygons() +
tm_shape(mpszpop2017[mpszpop2017$percent_demand>100, ]) +
tm_polygons(col = "percent_demand", style="equal", palette="Blues") +
tm_layout("2017: >100%", legend.position = c("right", "bottom"))
tmap_arrange(no_or_more_supply_2017_plot, more_demand_2017_plot, ncol=2)
Overall, there are more subzones that have “more demand than supply” as compared to “more supply than demand” and the different categories appear to be randomly distributed across Singapore. Based on the plots above for 2017, out of the 323 planning subzones, 141 subzones do not have enough childcare centres to provide to its demand of the 1 to 6 year olds (More demand than supply). Only 57 of the subzones have more supply of childcare services than demand (More supply than demand). There are no subzones where supply meets the demand (Supply meets demand). 76 subzones have neither 1 to 6 year old population nor childcare centres (No supply and demand). 28 subzones have no childcare centres to cater to its population of 1 to 6 year old (No supply), while 21 subzones have childcare centres but no population of 1 to 6 year old to cater to in its own subzone (No demand).
Ignoring the fact that the population of 1 to 6 year olds are able to travel to neighbouring subzones for childcare services, the analysis will focus on the supply and demand within each subzone (at the subzone level). Subzones with no supply and demand, no supply, no demand are assigned 0% of percent_demand as it can only be measured if there are both values. Percent_demand is measured by taking the population of 1 to 6 year olds in a subzone divided by the total number of places/intakes of childcare centres in the same subzone, multiplied by 100.
Looking at the plot with more than 0% and equal or less than 100% of percent_demand, there are 12 subzones out of 57 subzones with 20.68% or less of childcare centre services being utilized (lower range). There are 10 subzones that falls into the 79.94% to 99.69% of percent_demand (upper range). Kim Keat has the highest percent_demand at 99.69%. At subzones where there are more supply than demand, its services seems to be mostly under utilized.
Looking at the plot with more than 100% of percent_demand, we can see varying amounts of demand for services. The highest amount of percent_demand is at 770.09%, which is Redhill, which means that there are about 7.7 times more demand than the childcare services being supplied at the subzone. 1 childcare centre in Redhill is insufficient to cater to a population of 824.
tm_shape(mpszpop2020)+
tm_fill(col="Age_1_to_6_pop", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
The plot above shows the population of 1 to 6 year olds at each planning subzone in Singapore for 2020. Similar to what was explained in Part 2.1.1, we shall look at the population density instead.
mpszpop2020 <- mpszpop2020 %>%
mutate(pop_density = (Age_1_to_6_pop/SHAPE_Area)*1000000)
ggplot(mpszpop2020, aes(pop_density)) +
geom_histogram() +
theme_classic() +
labs(y= "Frequency", x = "Population density per km^2 of 1 to 6 year olds in subzones")
ggplot(mpszpop2020, aes(y = Age_1_to_6_pop, x= pop_density)) +
geom_point() +
theme_classic() +
labs(y= "Population of 1 to 6 year olds in subzones", x = "Population density per km^2 of 1 to 6 year olds in subzones")
summary(mpszpop2020$pop_density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 304.1 584.5 864.7 4537.9
tm_shape(mpszpop2020)+
tm_fill(col="pop_density", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
Based on the plots above, out of the 323 planning subzones, majority (75% of planning subzones) have a population density that is 864.7 per km^2 and below. There appears to be clusters of subzones with high population density in the North-East Region of Singapore. Similar to what was observed in 2017, there are 2 subzones with values that are in the highest range, Matilda has a population density of 4537.8774 per km^2 while Waterway East has 4318.9615 per km^2. Both of the subzones are under the Punggol Planning Area and in the North-East Region. There are multiple subzones with 0 population density.
mpszpop2020$`childcare_count` <- lengths(st_intersects(mpszpop2020, childcare_2020))
Childcare centres in 2020:
tm_shape(mpszpop2020) +
tm_polygons(col="white") +
tm_shape(childcare_2020) +
tm_dots(col="black", size=0.1, alpha=0.3) +
tmap_style("white")
There appears to be clusters of childcare centres at multiple subzones, especially those at the North-East, North, and West Regions. There are also many subzones with very little or no childcare centres.
tm_shape(mpszpop2020)+
tm_fill(col="childcare_count", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
The plot above shows the number of childcare centres at each planning subzone in Singapore for 2020. We shall look at the childcare centre density instead, which is the number of childcare centres per km^2 in each subzone.
mpszpop2020 <- mpszpop2020 %>%
mutate(childcare_density = (childcare_count/SHAPE_Area)*1000000)
ggplot(mpszpop2020, aes(childcare_density)) +
geom_histogram() +
theme_classic() +
labs(y= "Frequency", x = "Childcare centres density per km^2 in subzones")
ggplot(mpszpop2020, aes(y = childcare_count, x= childcare_density)) +
geom_point() +
theme_classic() +
labs(y= "Number of childcare centres in subzones", x = "Childcare centres density per km^2 in subzones")
summary(mpszpop2020$childcare_density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 3.284 4.574 7.775 35.602
tm_shape(mpszpop2020)+
tm_fill(col="childcare_density", style="equal", palette = "Blues") +
tm_layout(legend.position = c("right", "bottom")) +
tm_borders(alpha = 0.5) +
tmap_style("white")
Based on the plots above, out of the 323 planning subzones, majority (75% of planning subzones) have a childcare centre density of 7.775 per km^2 and below at their respective subzones. It seems like there are a few subzones at the Central Region of Singapore with relatively high childcare_density as compared to the rest of Singapore. There are 2 subzones with values that falls into the highest range, Cecil has a childcare centre density of 35.601693 per km^2 while Mandai Estate has 34.931339 per km^2. There are multiple subzones with 0 childcare centre density.
Following the explanation in Part 2.1.4, the average number of places/intake of each childcare centre is approximately 107 places.
mpszpop2020 <- mpszpop2020 %>%
rowwise() %>%
mutate(Supply_Demand_Categories = supplyDemandCategoryFn(Age_1_to_6_pop, childcare_count)) %>%
mutate(percent_demand = percentDemandFn(Age_1_to_6_pop, childcare_count)) %>%
ungroup()
mpszpop2020 <- st_as_sf(mpszpop2020)
sd_cat_freq_2020 <- mpszpop2020 %>%
group_by(Supply_Demand_Categories) %>%
summarize(sd_cat_count = n())
ggplot(sd_cat_freq_2020, aes(x=Supply_Demand_Categories, y=sd_cat_count)) +
geom_bar(position = 'dodge', stat = "identity", width=0.2) +
geom_text(aes(label=sd_cat_count), position=position_dodge(width=0.9), vjust=-0.25) +
theme_classic() +
labs(y= "Number of Each Supply and Demand Category", x = "Supply and Demand Categories") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
(supply_demand_categories_2020_plot <- tm_shape(mpszpop2020) +
tm_polygons(col = "Supply_Demand_Categories", palette="Accent") +
tm_layout("2020"))
To put into perspective of how much more or less childcare services there should be at each subzone, we shall look at the percent of the demand of childcare services vs the supply of services offered. Percent_demand is measured by taking the population of 1 to 6 year olds in a subzone divided by the total number of places/intakes of childcare centres in the same subzone, multiplied by 100.
summary(mpszpop2020$percent_demand)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 71.03 90.77 146.96 654.21
no_or_more_supply_2020_plot <-
tm_shape(mpszpop2020) +
tm_polygons() +
tm_shape(mpszpop2020[mpszpop2020$percent_demand>0 & mpszpop2020$percent_demand<=100, ]) +
tm_polygons(col = "percent_demand", style="equal", palette="-Greens") +
tm_layout("2020: >0% and <=100%", legend.position = c("right", "bottom"))
more_demand_2020_plot <-
tm_shape(mpszpop2017) +
tm_polygons() +
tm_shape(mpszpop2020[mpszpop2020$percent_demand>100, ]) +
tm_polygons(col = "percent_demand", style="equal", palette="Blues") +
tm_layout("2020: >100%", legend.position = c("right", "bottom"))
tmap_arrange(no_or_more_supply_2020_plot, more_demand_2020_plot, ncol=2)
Overall, there are more subzones that have “more demand than supply” as compared to “more supply than demand” and the different categories appear to be randomly distributed across Singapore. Based on the plots above for 2020, out of the 323 planning subzones, 137 subzones do not have enough childcare centres to provide to its demand of the 1 to 6 year olds (More demand than supply). Only 66 of the subzones have more supply of childcare services than demand (More supply than demand). 1 of the subzones has childcare services that meets the supply and demand, which is at Lavender (Supply Meets Demand). 78 subzones have neither 1 to 6 year old population nor childcare centres (No supply and demand). 18 subzones have no childcare centres (No supply) to cater to its population of 1 to 6 year old, while 23 subzones have childcare centres but no population of 1 to 6 year old to cater to in its own subzone (No demand).
Ignoring the fact that the population of 1 to 6 year olds are able to travel to neighbouring subzones for childcare services, the analysis will focus on the supply and demand within each subzone (at the subzone level). Subzones with no supply and demand, no supply, no demand are assigned 0% of percent_demand as it can only be measured if there are both values. Percent_demand is measured by taking the population of 1 to 6 year olds in a subzone divided by the total number of places/intakes of childcare centres in the same subzone, multiplied by 100.
Looking at the plot with more than 0% and equal or less than 100% of percent_demand, there are 15 subzones out of 66 subzones with 20.56% or less of childcare centre services being utilized (lower range). There are 19 subzones that falls into the 80.2% to 100% of percent_demand (upper range). Lavender has the highest percent_demand at 100%. At subzones where there are more supply than demand, its services still seems to be mostly under utilized.
Looking at the plot with more than 100% of percent_demand, we can see varying amounts of demand for services. The highest amount of percent_demand is at 654.21%, which is Redhill, which means that there are about 6.5 times more demand than the childcare services being supplied at the subzone. 1 childcare centre in Redhill is insufficient to cater to a population of 700.
| Year | 2017 | 2020 | Difference |
|---|---|---|---|
| Total population of 1 to 6 year olds: | 230852 | 227796 | -3056 |
| Total number of childcare centres: | 1312 | 1545 | +233 |
| Highest population subzone: | Tampines East | Tampines East | - |
| Subzone highest population count: | 6928 | 6752 | -176 |
| Highest poplation density subzone: | Matilda | Matilda | - |
| Subzone highest population density (per km^2): | 4233.28 | 4538.88 | +305.6 |
| Highest number of childcare centres subzone: | Woodlands East | Tampines East and Woodlands East | - |
| Subzone highest number of childcare centres: | 36 | 42 | +6 |
| Highest childcare centres density subzone: | Mandai Estate | Cecil | - |
| Subzone highest childcare centres density (per km^2): | 27.95 | 35.60 | +7.65 |
| Highest percent_demand subzone: | Redhill | Redhill | - |
| Subzone highest percent_demand: | 770.09% | 654.21% | -115.88% |
| Total number of subzones: | 323 | 323 | - |
~Number of subzones where there are: |
|||
| More Demand than Supply | 141 | 137 | -4 |
| More Supply than Demand | 57 | 66 | +9 |
| Supply Meets Demand | 0 | 1 | +1 |
| No Supply and Demand | 76 | 78 | +2 |
| No Supply | 28 | 18 | -10 |
| No Demand | 21 | 23 | +2 |
tmap_arrange(supply_demand_categories_2017_plot, supply_demand_categories_2020_plot, ncol=2)
tmap_arrange(no_or_more_supply_2017_plot, no_or_more_supply_2020_plot, ncol=2)
tmap_arrange(more_demand_2017_plot, more_demand_2020_plot, ncol=2)
Comparing 2017 and 2020, the total population of 1 to 6 year olds fell by 3056 to 227796 in 2020. 2020 also saw an increase of 233 childcare centres as compared to 2017. Does an increase in the total number of childcare centres improve the overall supply and demand of childcare services at the subzone level?
Generally for both 2017 and 2020, there are more subzones that have “more demand than supply” as compared to “more supply than demand” categories.
Although it is not obvious on the map that compares the supply and demand categories of 2017 and 2020, there appears to be a slight improvement as there is a decrease of 4 subzones with “more demand than supply”. This means that there are 4 more subzones have sufficient or more childcare services to cater to the population of 1 to 6 year olds in 2020 as comapred to 2017.
10 subzones that had no childcare centres (no supply) within the subzone in 2017, have at least 1 childcare centre in 2020.
There appears to be an increase in the “more supply than demand” category, 9 more subzones now have childcare centres that provides childcare services that can accomodate more than what the population in the subzone require/demand. This brings the total to 66 subzones in 2020, which might be a good thing for the residents as the population within the subzone can have more options when selecting childcare centres (if there are more than 1 childcare centre) rather than to fight for a spot at the childcare centre. However, from the childcare centres’ point of view, this means that its services are not fully utilized.
In 2020, 1 subzone managed to exactly meet the supply and demand.
Comparing the percent_demand plots, Redhill which has 7.7 times more demand than the childcare services being supplied at the subzone in 2017 (770.09% percent_demand), saw an improvement in 2020 as the percent_demand dropped to 6.5 times more demand. Although the demand is still high, it appears that having more childcare centres built will help reduce the over demand.
The analysis of will be focused on 4 Planning Areas Sengkang, Bedok, Bukit Batok and Hougang.
Apart from looking at each planning areas as a whole, each categories will be further split into 2 categories: Government-Linked childcare centres in each Planning Area, and Private childcare centres in each Planning Area. Childcare centres with “Unknown” organisation types will not be analysed (as it might contain Government-linked or Private childcare centres. Refer to Part 1.3.4 for possible reasons of “Unknown” organisation type).
From sf data.frame to SpatialPointsDataFrame
spdf_childcare_2017 <- as_Spatial(childcare_2017)
spdf_childcare_2017_gov<- as_Spatial(childcare_2017[childcare_2017$organisation_type == "Government-linked", ])
spdf_childcare_2017_pte <- as_Spatial(childcare_2017[childcare_2017$organisation_type == "Private", ])
spdf_childcare_2020 <- as_Spatial(childcare_2020)
spdf_childcare_2020_gov<- as_Spatial(childcare_2020[childcare_2020$organisation_type == "Government-linked", ])
spdf_childcare_2020_pte <- as_Spatial(childcare_2020[childcare_2020$organisation_type == "Private", ])
From SpatialPointsDataFrame to generic sp format
# from SpatialPointsDataFrame to SpatialPoints
sp_childcare_2017 <- as(spdf_childcare_2017, "SpatialPoints")
sp_childcare_2017_gov <- as(spdf_childcare_2017_gov, "SpatialPoints")
sp_childcare_2017_pte <- as(spdf_childcare_2017_pte, "SpatialPoints")
sp_childcare_2020 <- as(spdf_childcare_2020, "SpatialPoints")
sp_childcare_2020_gov <- as(spdf_childcare_2017_gov, "SpatialPoints")
sp_childcare_2020_pte <- as(spdf_childcare_2017_pte, "SpatialPoints")
From SpatialPoints to spatstat’s ppp format
ppp_childcare_2017 <- as(sp_childcare_2017, "ppp")
ppp_childcare_2017_gov <- as(sp_childcare_2017_gov, "ppp")
ppp_childcare_2017_pte <- as(sp_childcare_2017_pte, "ppp")
ppp_childcare_2020 <- as(sp_childcare_2020, "ppp")
ppp_childcare_2020_gov <- as(sp_childcare_2020_gov, "ppp")
ppp_childcare_2020_pte <- as(sp_childcare_2020_pte, "ppp")
ppp_childcare_2017 dataset:
any(duplicated(ppp_childcare_2017))
## [1] TRUE
sum(multiplicity(ppp_childcare_2017) > 1)
## [1] 85
85 duplicates, handle the duplicates using the jittering approach.
ppp_childcare_2017_jit <- rjitter(ppp_childcare_2017, retry=TRUE, nsim=1, drop=TRUE)
Check if duplicates are handled.
any(duplicated(ppp_childcare_2017_jit))
## [1] FALSE
sum(multiplicity(ppp_childcare_2017_jit) > 1)
## [1] 0
There are no more duplicated points for the ppp_childcare_2017_jit dataset.
ppp_childcare_2017_gov dataset:
any(duplicated(ppp_childcare_2017_gov))
## [1] FALSE
sum(multiplicity(ppp_childcare_2017_gov) > 1)
## [1] 0
There are no duplicated points for the ppp_childcare_2017_gov dataset.
ppp_childcare_2017_pte dataset:
any(duplicated(ppp_childcare_2017_pte))
## [1] TRUE
sum(multiplicity(ppp_childcare_2017_pte) > 1)
## [1] 49
49 duplicates, handle the duplicates using the jittering approach.
ppp_childcare_2017_pte_jit <- rjitter(ppp_childcare_2017_pte, retry=TRUE, nsim=1, drop=TRUE)
Check if duplicates are handled.
any(duplicated(ppp_childcare_2017_pte_jit))
## [1] FALSE
sum(multiplicity(ppp_childcare_2017_pte_jit) > 1)
## [1] 0
There are no more duplicated points for the ppp_childcare_2017_pte_jit dataset.
ppp_childcare_2020 dataset:
any(duplicated(ppp_childcare_2020))
## [1] TRUE
sum(multiplicity(ppp_childcare_2020) > 1)
## [1] 128
128 duplicates, handle the duplicates using the jittering approach.
ppp_childcare_2020_jit <- rjitter(ppp_childcare_2020, retry=TRUE, nsim=1, drop=TRUE)
Check if duplicates are handled.
any(duplicated(ppp_childcare_2020_jit))
## [1] FALSE
sum(multiplicity(ppp_childcare_2020_jit) > 1)
## [1] 0
There are no more duplicated points for the ppp_childcare_2020_jit dataset.
ppp_childcare_2020_gov dataset:
any(duplicated(ppp_childcare_2020_gov))
## [1] FALSE
sum(multiplicity(ppp_childcare_2020_gov) > 1)
## [1] 0
There are no duplicated points for the ppp_childcare_2020_gov dataset.
ppp_childcare_2020_pte dataset:
any(duplicated(ppp_childcare_2020_pte))
## [1] TRUE
sum(multiplicity(ppp_childcare_2020_pte) > 1)
## [1] 49
49 duplicates, handle the duplicates using the jittering approach.
ppp_childcare_2020_pte_jit <- rjitter(ppp_childcare_2020_pte, retry=TRUE, nsim=1, drop=TRUE)
Check if duplicates are handled.
any(duplicated(ppp_childcare_2020_pte_jit))
## [1] FALSE
sum(multiplicity(ppp_childcare_2020_pte_jit) > 1)
## [1] 0
There are no more duplicated points for the ppp_childcare_2020_pte_jit dataset.
To perform spatial point patterns analysis, we shall remove subzones within the 4 planning areas with 0 population from mpszpop2017 and mpszpop2020 as these are viewed as having no potential demand for childcare services.
Filter dataset and convert to sp data.frame
spdf_mpszpop2017 <- as_Spatial(mpszpop2017[mpszpop2017$Age_1_to_6_pop > 0, ])
spdf_mpszpop2020 <- as_Spatial(mpszpop2020[mpszpop2020$Age_1_to_6_pop > 0, ])
Extract the 4 planning areas for both years (Although both are from Master Plan 2014, the filtering of population might have a difference between both years)
sk_2017 <- spdf_mpszpop2017[spdf_mpszpop2017@data$PLN_AREA_N == "SENGKANG",]
bd_2017 <- spdf_mpszpop2017[spdf_mpszpop2017@data$PLN_AREA_N == "BEDOK",]
bb_2017 <- spdf_mpszpop2017[spdf_mpszpop2017@data$PLN_AREA_N == "BUKIT BATOK",]
hg_2017 <- spdf_mpszpop2017[spdf_mpszpop2017@data$PLN_AREA_N == "HOUGANG",]
sk_2020 <- spdf_mpszpop2020[spdf_mpszpop2020@data$PLN_AREA_N == "SENGKANG",]
bd_2020 <- spdf_mpszpop2020[spdf_mpszpop2020@data$PLN_AREA_N == "BEDOK",]
bb_2020 <- spdf_mpszpop2020[spdf_mpszpop2020@data$PLN_AREA_N == "BUKIT BATOK",]
hg_2020 <- spdf_mpszpop2020[spdf_mpszpop2020@data$PLN_AREA_N == "HOUGANG",]
Convert sp data.frame to generic sp format
sp_sk_2017 <- as(sk_2017, "SpatialPolygons")
sp_bd_2017 <- as(bd_2017, "SpatialPolygons")
sp_bb_2017 <- as(bb_2017, "SpatialPolygons")
sp_hg_2017 <- as(hg_2017, "SpatialPolygons")
sp_sk_2020 <- as(sk_2020, "SpatialPolygons")
sp_bd_2020 <- as(bd_2020, "SpatialPolygons")
sp_bb_2020 <- as(bb_2020, "SpatialPolygons")
sp_hg_2020 <- as(hg_2020, "SpatialPolygons")
Convert generic sp format to owin object
owin_sk_2017 <- as(sp_sk_2017, "owin")
owin_bd_2017 <- as(sp_bd_2017, "owin")
owin_bb_2017 <- as(sp_bb_2017, "owin")
owin_hg_2017 <- as(sp_hg_2017, "owin")
owin_sk_2020 <- as(sp_sk_2020, "owin")
owin_bd_2020 <- as(sp_bd_2020, "owin")
owin_bb_2020 <- as(sp_bb_2020, "owin")
owin_hg_2020 <- as(sp_hg_2020, "owin")
(Note: for gov childcare ppp of 2017 and 2020, there are no duplicated points so no jittering is needed).
childcare_sk_2017_ppp = ppp_childcare_2017_jit[owin_sk_2017]
childcare_sk_2017_ppp_gov = ppp_childcare_2017_gov[owin_sk_2017]
childcare_sk_2017_ppp_pte = ppp_childcare_2017_pte_jit[owin_sk_2017]
childcare_bd_2017_ppp = ppp_childcare_2017_jit[owin_bd_2017]
# childcare_bd_2017_ppp_gov = ppp_childcare_2017_gov[owin_bd_2017]
# childcare_bd_2017_ppp_pte = ppp_childcare_2017_pte_jit[owin_bd_2017]
childcare_bb_2017_ppp = ppp_childcare_2017_jit[owin_bb_2017]
# childcare_bb_2017_ppp_gov = ppp_childcare_2017_gov[owin_bb_2017]
# childcare_bb_2017_ppp_pte = ppp_childcare_2017_pte_jit[owin_bb_2017]
childcare_hg_2017_ppp = ppp_childcare_2017_jit[owin_hg_2017]
# childcare_hg_2017_ppp_gov = ppp_childcare_2017_gov[owin_hg_2017]
# childcare_hg_2017_ppp_pte = ppp_childcare_2017_pte_jit[owin_hg_2017]
childcare_sk_2020_ppp = ppp_childcare_2020_jit[owin_sk_2020]
# childcare_sk_2020_ppp_gov = ppp_childcare_2020_gov[owin_sk_2020]
# childcare_sk_2020_ppp_pte = ppp_childcare_2020_pte_jit[owin_sk_2020]
childcare_bd_2020_ppp = ppp_childcare_2020_jit[owin_bd_2020]
# childcare_bd_2020_ppp_gov = ppp_childcare_2020_gov[owin_bd_2020]
# childcare_bd_2020_ppp_pte = ppp_childcare_2020_pte_jit[owin_bd_2020]
childcare_bb_2020_ppp = ppp_childcare_2020_jit[owin_bb_2020]
# childcare_bb_2020_ppp_gov = ppp_childcare_2020_gov[owin_bb_2020]
# childcare_bb_2020_ppp_pte = ppp_childcare_2020_pte_jit[owin_bb_2020]
childcare_hg_2020_ppp = ppp_childcare_2020_jit[owin_hg_2020]
# childcare_hg_2020_ppp_gov = ppp_childcare_2020_gov[owin_hg_2020]
# childcare_hg_2020_ppp_pte = ppp_childcare_2020_pte_jit[owin_hg_2020]
Note: Due to the limited computing resource and file size requirement to upload to RPubs: - Only Sengkang 2017 will contain plots for Government-linked childcare centres in Sengkang 2017 (Part 4.5.1.1) and Private childcare centres in Sengkang 2017 (Part 4.5.1.2) to compare if there are differences between the organisation types. Others will only have the Planning Area as a whole for 2017 and 2020.
plot(childcare_sk_2017_ppp)
For Sengkang 2017, although there are some areas where the childcare centres appear to have a random distribution, the overall of childcare centres in Sengkang seems to display signs of clustering.
plot(childcare_sk_2017_ppp_gov)
For Sengkang Government-linked childcare centres in 2017, the overall it seems to display signs of random distribution.
plot(childcare_sk_2017_ppp_pte)
For Sengkang Private childcare centres in 2017, some parts display signs of random distribution but the overall seems to display signs of clustering especially at the right region of Sengkang.
plot(childcare_bd_2017_ppp)
For Bedok 2017, the overall spatial distibution of childcare centres seems to display signs of clustering.
plot(childcare_bb_2017_ppp)
For Bukit Batok 2017, the overall spatial distibution of childcare centres seems to display signs of clustering, especially at the South-West Region.
plot(childcare_hg_2017_ppp)
For Hougang 2017, the overall spatial distibution of childcare centres seems to display signs of clustering, except at the Southern portion.
plot(childcare_sk_2020_ppp)
For Sengkang 2020, the overall spatial distibution of childcare centres seems to display signs of clustering, especially at the East Region.
plot(childcare_bd_2020_ppp)
For Bedok 2020, the overall spatial distibution of childcare centres seems to display signs of clustering.
plot(childcare_bb_2020_ppp)
For Bukit Batok 2020, the overall spatial distibution of childcare centres seems to display signs of clustering except the Northern and Southern portion.
plot(childcare_hg_2020_ppp)
For Hougang 2020, the overall spatial distibution of childcare centres seems to display signs of clustering.
For 2nd order spatial point patterns analysis, there are 4 analysis techniques. The G and F functions are based on the number of observations within a certain distance count, using the nearest neighbour distance method which has a limitation; it only consider the shortest scales of variation. On the other hand, the K and L functions uses a moving window method so it will give us better indications. As L is the normalised/standardised version of the K function, the L function is the preferred technique to use.
For all plots for the L function csr test, the theoretical values are represented by the red dotted line, the observed L values are represented by the black line, and the confidence envelope (95%) is represented by the grey area.
plotLFunctionCSRFn <- function(inputTitle, input_csr){
input_csr_df <- as.data.frame(input_csr)
colour=c("#0D657D","#ee770d","#D3D3D3")
Lplot <- ggplot(input_csr_df, aes(r, obs-r)) +
# plot observed value
geom_line(colour=c("#4d4d4d"))+
geom_line(aes(r,theo-r), colour="red", linetype = "dashed")+
# plot simulation envelopes
geom_ribbon(aes(ymin=lo-r,ymax=hi-r),alpha=0.1, colour=c("#91bfdb")) +
# SVY 21 unit is in m
xlab("Distance d (m)") +
ylab("L(r)-r") +
geom_rug(data=input_csr_df[input_csr_df$obs > input_csr_df$hi,], sides="b", colour=colour[1]) +
geom_rug(data=input_csr_df[input_csr_df$obs < input_csr_df$lo,], sides="b", colour=colour[2]) +
geom_rug(data=input_csr_df[input_csr_df$obs >= input_csr_df$lo & input_csr_df$obs <= input_csr_df$hi,], sides="b", color=colour[3]) +
theme_tufte() +
ggtitle(inputTitle)
text1<-"Significant clustering"
text2<-"Significant dispersion"
text3<-"No significant clustering/dispersion"
# conditional statement is required to ensure that the labels (text1/2/3) are assigned to the correct traces
if (nrow(input_csr_df[input_csr_df$obs > input_csr_df$hi,])==0){
if (nrow(input_csr_df[input_csr_df$obs < input_csr_df$lo,])==0){
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text3, traces = 4) %>%
rangeslider()
}else if (nrow(input_csr_df[input_csr_df$obs >= input_csr_df$lo & input_csr_df$obs <= input_csr_df$hi,])==0){
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
rangeslider()
}else {
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else if (nrow(input_csr_df[input_csr_df$obs < input_csr_df$lo,])==0){
if (nrow(input_csr_df[input_csr_df$obs >= input_csr_df$lo & input_csr_df$obs <= input_csr_df$hi,])==0){
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
rangeslider()
} else{
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else{
ggplotly(Lplot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text2, traces = 5) %>%
style(text = text3, traces = 6) %>%
rangeslider()
}
}
Note: Due to the limited computing resource and file size requirement to upload to RPubs: - Only Sengkang 2017 (Part 5.2.1) will contain an interactive plot for the CSR test, others will have a static plot. - Only Sengkang 2017 will contain plots for Government-linked childcare centres in Sengkang 2017 (Part 5.2.1.1) and Private childcare centres in Sengkang 2017 (Part 5.2.1.2) to compare if there are differences between the organisation types. Others will only have the Planning Area as a whole for 2017 and 2020.
L_sk_2017 = Lest(childcare_sk_2017_ppp, correction = "Ripley")
plot(L_sk_2017, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_sk_2017.csr <- envelope(childcare_sk_2017_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
Interactive Plot:
plotLFunctionCSRFn("L function - Sengkang 2017", L_sk_2017.csr)
The point pattern of childcare centres at Sengkang in 2017 largely lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_sk_2017_gov = Lest(childcare_sk_2017_ppp_gov, correction = "Ripley")
plot(L_sk_2017_gov, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_sk_2017_gov.csr <- envelope(childcare_sk_2017_ppp_gov, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Sengkang 2017 Gov", L_sk_2017_gov.csr)
plot(L_sk_2017_gov.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of Government-linked childcare centres at Sengkang in 2017 largely lies within the envelope, which means it is not statistically significant and we fail to reject the Ho.
L_sk_2017_pte = Lest(childcare_sk_2017_ppp_pte, correction = "Ripley")
plot(L_sk_2017_pte, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_sk_2017_pte.csr <- envelope(childcare_sk_2017_ppp_pte, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Sengkang 2017 Pte", L_sk_2017_pte.csr)
plot(L_sk_2017_pte.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of Private childcare centres at Sengkang in 2017 largely lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_bd_2017 = Lest(childcare_bd_2017_ppp, correction = "Ripley")
plot(L_bd_2017, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_bd_2017.csr <- envelope(childcare_bd_2017_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Bedok 2017", L_bd_2017.csr)
plot(L_bd_2017.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Bedok in 2017 largely lies within the envelope, which means it is not statistically significant and we fail to reject the Ho.
L_bb_2017 = Lest(childcare_bb_2017_ppp, correction = "Ripley")
plot(L_bb_2017, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_bb_2017.csr <- envelope(childcare_bb_2017_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Bukit Batok 2017", L_bb_2017.csr)
plot(L_bb_2017.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Bukit Batok in 2017 largely lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_hg_2017 = Lest(childcare_hg_2017_ppp, correction = "Ripley")
plot(L_hg_2017, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_hg_2017.csr <- envelope(childcare_hg_2017_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Hougang 2017", L_hg_2017.csr)
plot(L_hg_2017.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Hougang in 2017 shows some signs of clustering at certain distances but it largely lies within the envelope, which means it is not statistically significant and we fail to reject the Ho.
L_sk_2020 = Lest(childcare_sk_2020_ppp, correction = "Ripley")
plot(L_sk_2020, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_sk_2020.csr <- envelope(childcare_sk_2020_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Sengkang 2020", L_sk_2020.csr)
plot(L_sk_2020.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Sengkang in 2020 largely lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_bd_2020 = Lest(childcare_bd_2020_ppp, correction = "Ripley")
plot(L_bd_2020, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_bd_2020.csr <- envelope(childcare_bd_2020_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Bedok 2020", L_bd_2020.csr)
plot(L_bd_2020.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Bedok in 2020 largely lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_bb_2020 = Lest(childcare_bb_2020_ppp, correction = "Ripley")
plot(L_bb_2020, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_bb_2020.csr <- envelope(childcare_bb_2020_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Bukit Batok 2020", L_bb_2020.csr)
plot(L_bb_2020.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Bukit Batok in 2020 lies above the envelope and resembles clustering after a certain distance, hence spatial clustering is statistically significant and we reject the Ho.
L_hg_2020 = Lest(childcare_hg_2020_ppp, correction = "Ripley")
plot(L_hg_2020, . -r ~ r, ylab= "L(d)-r", xlab = "d(m)")
set.seed(50)
L_hg_2020.csr <- envelope(childcare_hg_2020_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)
## Generating 39 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
# plotLFunctionCSRFn("L function - Hougang 2020", L_hg_2020.csr)
plot(L_hg_2020.csr, . - r ~ r, xlab="d(m)", ylab="L(d)-r")
The point pattern of childcare centres at Hougang in 2020 largely lies within the envelope, which means it is not statistically significant and we fail to reject the Ho.