This analysis aims to analyse and compare childcare services in 2017 and 2020. There will be two parts to this analysis: (1) Supply and Demand Analysis of Childcare Services, and (2) Spatial Point Pattern analysis of Childcare Services.
The following data sets will be used in the analysis.
| Data | Format | Description | Source |
|---|---|---|---|
| Childcare (2020) | KML | Location of childcare services in 2020 | data.gov.sg (https://data.gov.sg/dataset/child-care-services) |
| Childcare (2017) | Shapefile | Location of childcare services in 2017 | Hands-on Exercise Data |
| Subzone | Shapefile | Master Plan 2014 subzone boundary polygons | data.gov.sg (https://data.gov.sg/dataset/master-plan-2014-subzone-boundary-no-sea) |
| Population data | CSV | Singapore residents by planning area, subzone, age group, sex and type of dwelling (2011-2019) | Department of Statistics Singapore (https://www.singstat.gov.sg/find-data/search-by-theme/population/geographic-distribution/latest-data) |
| Crude birth rate | CSV | Number of live births in Singapore each year, per thousand mid-year population | data.gov.sg (https://data.gov.sg/dataset/births-and-fertility-annual?resource_id=2ba37efc-5411-4f1f-aecf-ea2455c9236d) |
packages = c('tidyverse', 'tmap', 'sf', 'rgdal', 'maptools', 'raster', 'spatstat', 'plotly', 'tmaptools')
for (p in packages) {
if (!require(p, character.only = T)) {
install.packages(p)
}
library(p, character.only = T)
}
Import the data and examine contents of each data set
childcare20_sf <- st_read('data/geospatial/child-care-services-kml.kml')
## Reading layer `CHILDCARE' from data source `C:\Users\Xiao Rong\Desktop\School\Geospatial Analytics and Applications\Assignments\Take-Home Exercise 1\IS415_Take-home_Ex01\data\geospatial\child-care-services-kml.kml' using driver `KML'
## Simple feature collection with 1545 features and 2 fields
## geometry type: POINT
## dimension: XYZ
## bbox: xmin: 103.6824 ymin: 1.248403 xmax: 103.9897 ymax: 1.462134
## z_range: zmin: 0 zmax: 0
## geographic CRS: WGS 84
glimpse(childcare20_sf)
## Rows: 1,545
## Columns: 3
## $ Name <chr> "kml_1", "kml_2", "kml_3", "kml_4", "kml_5", "kml_6", "...
## $ Description <chr> "<center><table><tr><th colspan='2' align='center'><em>...
## $ geometry <POINT [°]> POINT Z (103.8331 1.42972 0), POINT Z (103.8138 1...
childcare17_sf <- st_read(dsn = 'data/geospatial',
layer = 'CHILDCARE')
## Reading layer `CHILDCARE' from data source `C:\Users\Xiao Rong\Desktop\School\Geospatial Analytics and Applications\Assignments\Take-Home Exercise 1\IS415_Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 1312 features and 18 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 11203.01 ymin: 25667.6 xmax: 45404.24 ymax: 49300.88
## projected CRS: SVY21
glimpse(childcare17_sf)
## Rows: 1,312
## Columns: 19
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ ADDRESSBLO <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ADDRESSBUI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ADDRESSPOS <chr> "387908", "489773", "569880", "520114", "437157", "76092...
## $ ADDRESSSTR <chr> "11 LORONG 37 GEYLANG SINGAPORE 387908", "13 BEDOK RIA P...
## $ ADDRESSTYP <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ DESCRIPTIO <chr> "Child Care Services", "Child Care Services", "Child Car...
## $ HYPERLINK <chr> "http://www.childcarelink.gov.sg/ccls/chdcentpart/ChdCen...
## $ LANDXADDRE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ LANDYADDRE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ NAME <chr> "FIRST JUNIOR PRESCHOOL", "DISCOVERY KIDZ PRESCHOOL PTE....
## $ PHOTOURL <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ADDRESSFLO <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ INC_CRC <chr> "45DBE80EB321A9B5", "B80CD6C30B33E468", "53490A27EDC8D9B...
## $ FMEL_UPD_D <date> 2016-12-23, 2016-12-23, 2016-12-23, 2016-12-23, 2016-12...
## $ ADDRESSUNI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ X_ADDR <dbl> 34246.67, 41122.34, 32682.01, 41034.07, 34806.74, 28423....
## $ Y_ADDR <dbl> 33141.02, 34355.95, 39989.33, 36221.83, 32997.58, 45471....
## $ geometry <POINT [m]> POINT (34246.67 33141.02), POINT (41122.34 34355.9...
subzone_sf <- st_read(dsn = 'data/geospatial',
layer = 'MP14_SUBZONE_NO_SEA_PL')
## Reading layer `MP14_SUBZONE_NO_SEA_PL' from data source `C:\Users\Xiao Rong\Desktop\School\Geospatial Analytics and Applications\Assignments\Take-Home Exercise 1\IS415_Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
glimpse(subzone_sf)
## Rows: 323
## Columns: 16
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ SUBZONE_NO <int> 2, 2, 3, 4, 5, 4, 10, 12, 4, 6, 1, 1, 3, 8, 3, 7, 9, 2, ...
## $ SUBZONE_N <chr> "PEOPLE'S PARK", "BUKIT MERAH", "CHINATOWN", "PHILLIP", ...
## $ SUBZONE_C <chr> "OTSZ02", "BMSZ02", "OTSZ03", "DTSZ04", "DTSZ05", "OTSZ0...
## $ CA_IND <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y", "Y", "...
## $ PLN_AREA_N <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN CORE", "DOW...
## $ PLN_AREA_C <chr> "OT", "BM", "OT", "DT", "DT", "OT", "BM", "DT", "BM", "D...
## $ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "C...
## $ REGION_C <chr> "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "C...
## $ INC_CRC <chr> "B4120D23006C932A", "1C51019439A68700", "0FF1661344C84AE...
## $ FMEL_UPD_D <date> 2016-05-11, 2016-05-11, 2016-05-11, 2016-05-11, 2016-05...
## $ X_ADDR <dbl> 28831.78, 26360.80, 29153.97, 29706.72, 29968.62, 29509....
## $ Y_ADDR <dbl> 29419.65, 29384.14, 29158.04, 29744.91, 29572.76, 29646....
## $ SHAPE_Leng <dbl> 1822.1927, 3074.9632, 4297.5999, 871.5549, 1872.7522, 16...
## $ SHAPE_Area <dbl> 93140.44, 411722.82, 587222.68, 39437.94, 188767.49, 133...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((29099.02 29..., MULTIPOLYGO...
population <- read_csv('data/aspatial/respopagesextod2011to2019.csv')
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## TOD = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
glimpse(population)
## Rows: 883,728
## Columns: 7
## $ PA <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang M...
## $ SZ <chr> "Ang Mo Kio Town Centre", "Ang Mo Kio Town Centre", "Ang Mo Ki...
## $ AG <chr> "0_to_4", "0_to_4", "0_to_4", "0_to_4", "0_to_4", "0_to_4", "0...
## $ Sex <chr> "Males", "Males", "Males", "Males", "Males", "Males", "Males",...
## $ TOD <chr> "HDB 1- and 2-Room Flats", "HDB 3-Room Flats", "HDB 4-Room Fla...
## $ Pop <dbl> 0, 10, 30, 50, 0, 0, 40, 0, 0, 10, 30, 60, 0, 0, 40, 0, 0, 10,...
## $ Time <dbl> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 20...
birthrate <- read_csv('data/aspatial/crude-birth-rate.csv')
## Parsed with column specification:
## cols(
## year = col_double(),
## level_1 = col_character(),
## value = col_double()
## )
glimpse(birthrate)
## Rows: 59
## Columns: 3
## $ year <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969,...
## $ level_1 <chr> "Crude Birth Rate", "Crude Birth Rate", "Crude Birth Rate",...
## $ value <dbl> 37.5, 35.2, 33.7, 33.2, 31.6, 29.5, 28.3, 25.6, 23.5, 21.8,...
Pre-process and prepare data for analysis.
Ensure that spatial data to be used for analysis has no invalid geometries.
length(which(st_is_valid(subzone_sf) == FALSE))
## [1] 9
length(which(st_is_valid(childcare20_sf) == FALSE))
## [1] 0
length(which(st_is_valid(childcare17_sf) == FALSE))
## [1] 0
Only the subzone data has invalid geometries. Handle the invalid geometries and make them valid.
subzone_sf <- st_make_valid(subzone_sf)
length(which(st_is_valid(subzone_sf) == FALSE))
## [1] 0
Invalid geometries have been handled, there are no more invalid geometries for the spatial datasets.
Check the population attribute data for missing values, as missing values can impact future calculations.
population[rowSums(is.na(population))!=0,]
## # A tibble: 0 x 7
## # ... with 7 variables: PA <chr>, SZ <chr>, AG <chr>, Sex <chr>, TOD <chr>,
## # Pop <dbl>, Time <dbl>
There are no missing values in the population data.
Prepare spatial data for use in spatial analysis. Define the coordinate reference system (CRS).
st_crs(childcare20_sf)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
Transform projection of childcare20_sf from WGS84 to EPSG:3414.
childcare20_sf3414 <- st_transform(childcare20_sf, 3414)
st_crs(childcare20_sf3414)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
st_crs(childcare17_sf)
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
Assign EPSG:3414 as the projection to childcare17_sf.
childcare17_sf3414 <- st_set_crs(childcare17_sf, 3414)
st_crs(childcare17_sf3414)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
st_crs(subzone_sf)
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
Assign EPSG:3414 as the projection to subzone_sf.
subzone_sf3414 <- st_set_crs(subzone_sf, 3414)
st_crs(subzone_sf3414)
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
Plot spatial data to check that all data have a consistent projection system.
tm_shape(subzone_sf3414) +
tm_polygons(border.col='grey') +
tm_shape(childcare17_sf3414) +
tm_dots(col='brown4') +
tm_shape(childcare20_sf3414) +
tm_dots(col='lightsteelblue4') +
tm_layout(frame = FALSE)
The subzone and childcare points seem to be aligned, hence all spatial data is taken to be projected correctly.
Analysis will be performed on child care services data in 2017 and 2020. Hence, population data in 2017 and 2020 will be extracted separately from the population dataset.
population17 <- population %>%
filter(Time == 2017)
population20 <- population %>%
filter(Time == 2019)
Tidy the population dataset for each year, such that for each subzone, there will only be one row corresponding to population information.
Also, create another column calculating the total population for all age groups for each subzone.
population17_tidy <- population17 %>%
group_by(PA, SZ, AG) %>%
summarise(`POP` = sum(`Pop`)) %>%
ungroup() %>%
pivot_wider(names_from = AG, values_from = POP) %>%
mutate(TOTAL = rowSums(.[3:21]))
glimpse(population17_tidy)
## Rows: 323
## Columns: 22
## $ PA <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio...
## $ SZ <chr> "Ang Mo Kio Town Centre", "Cheng San", "Chong Boon", ...
## $ `0_to_4` <dbl> 190, 1120, 850, 680, 190, 590, 280, 960, 0, 170, 0, 9...
## $ `10_to_14` <dbl> 320, 1150, 1050, 990, 400, 740, 430, 950, 0, 200, 0, ...
## $ `15_to_19` <dbl> 280, 1280, 1320, 1120, 520, 830, 530, 870, 0, 260, 0,...
## $ `20_to_24` <dbl> 250, 1460, 1410, 1290, 550, 1010, 650, 1110, 0, 340, ...
## $ `25_to_29` <dbl> 320, 1850, 1750, 1490, 510, 1040, 700, 1460, 0, 330, ...
## $ `30_to_34` <dbl> 340, 1960, 1730, 1300, 290, 980, 400, 1430, 0, 220, 0...
## $ `35_to_39` <dbl> 380, 2340, 1950, 1510, 280, 1080, 440, 1830, 0, 220, ...
## $ `40_to_44` <dbl> 470, 2230, 1980, 1690, 480, 1230, 560, 1820, 0, 260, ...
## $ `45_to_49` <dbl> 450, 2180, 1880, 1740, 530, 1240, 580, 1530, 0, 280, ...
## $ `5_to_9` <dbl> 300, 1080, 920, 810, 320, 660, 340, 1030, 0, 160, 0, ...
## $ `50_to_54` <dbl> 350, 2250, 2180, 1870, 550, 1440, 640, 1580, 0, 310, ...
## $ `55_to_59` <dbl> 310, 2140, 2150, 1820, 550, 1430, 730, 1750, 0, 330, ...
## $ `60_to_64` <dbl> 290, 2240, 2170, 1780, 460, 1350, 640, 1710, 0, 330, ...
## $ `65_to_69` <dbl> 280, 2060, 2010, 1670, 420, 1140, 470, 1590, 0, 260, ...
## $ `70_to_74` <dbl> 170, 1210, 1450, 1120, 250, 800, 250, 1130, 0, 150, 0...
## $ `75_to_79` <dbl> 150, 910, 1110, 890, 220, 640, 220, 970, 0, 110, 0, 7...
## $ `80_to_84` <dbl> 70, 540, 570, 500, 140, 380, 160, 530, 0, 80, 0, 380,...
## $ `85_to_89` <dbl> 40, 280, 300, 290, 100, 220, 90, 300, 0, 40, 0, 200, ...
## $ `90_and_over` <dbl> 0, 120, 140, 120, 40, 90, 40, 150, 0, 30, 0, 80, 50, ...
## $ TOTAL <dbl> 4960, 28400, 26920, 22680, 6800, 16890, 8150, 22700, ...
population20_tidy <- population20 %>%
group_by(PA, SZ, AG) %>%
summarise(`POP` = sum(`Pop`)) %>%
ungroup() %>%
pivot_wider(names_from = AG, values_from = POP) %>%
mutate(TOTAL = rowSums(.[3:21]))
glimpse(population20_tidy)
## Rows: 323
## Columns: 22
## $ PA <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio...
## $ SZ <chr> "Ang Mo Kio Town Centre", "Cheng San", "Chong Boon", ...
## $ `0_to_4` <dbl> 170, 1050, 840, 730, 200, 540, 220, 750, 0, 160, 0, 7...
## $ `10_to_14` <dbl> 330, 1060, 1020, 1020, 410, 680, 430, 930, 0, 230, 0,...
## $ `15_to_19` <dbl> 310, 1210, 1150, 1090, 460, 720, 490, 830, 0, 260, 0,...
## $ `20_to_24` <dbl> 290, 1380, 1380, 1170, 530, 860, 570, 930, 0, 320, 0,...
## $ `25_to_29` <dbl> 290, 1810, 1600, 1450, 510, 1030, 690, 1370, 0, 330, ...
## $ `30_to_34` <dbl> 280, 2010, 1930, 1400, 320, 1030, 490, 1370, 0, 240, ...
## $ `35_to_39` <dbl> 330, 2220, 1820, 1550, 290, 1010, 360, 1520, 0, 240, ...
## $ `40_to_44` <dbl> 430, 2050, 1900, 1700, 400, 1090, 450, 1700, 0, 250, ...
## $ `45_to_49` <dbl> 470, 2220, 1840, 1860, 540, 1170, 600, 1550, 0, 330, ...
## $ `5_to_9` <dbl> 260, 1000, 880, 840, 280, 590, 320, 910, 0, 170, 0, 9...
## $ `50_to_54` <dbl> 350, 2070, 1980, 1820, 540, 1260, 590, 1400, 0, 290, ...
## $ `55_to_59` <dbl> 350, 2160, 2150, 1800, 550, 1420, 700, 1630, 0, 350, ...
## $ `60_to_64` <dbl> 290, 2200, 2160, 1750, 480, 1240, 710, 1680, 0, 360, ...
## $ `65_to_69` <dbl> 270, 2150, 2080, 1700, 420, 1140, 520, 1590, 0, 250, ...
## $ `70_to_74` <dbl> 200, 1570, 1670, 1300, 300, 910, 330, 1310, 0, 170, 0...
## $ `75_to_79` <dbl> 140, 1020, 1190, 900, 240, 670, 220, 960, 0, 110, 0, ...
## $ `80_to_84` <dbl> 80, 580, 740, 590, 150, 400, 190, 610, 0, 80, 0, 420,...
## $ `85_to_89` <dbl> 40, 310, 400, 330, 100, 240, 100, 330, 0, 30, 0, 230,...
## $ `90_and_over` <dbl> 10, 150, 200, 140, 50, 120, 60, 180, 0, 30, 0, 100, 6...
## $ TOTAL <dbl> 4890, 28220, 26930, 23140, 6770, 16120, 8040, 21550, ...
Obtain population information on the number of children aged 0 to 6.
\[\small Percentage\ of\ children\ aged\ 5\ and\ 6\ in\ the\ age\ group\ 5\ to\ 9\ in\ 2017\] \[\small = \frac{(Number\ of\ births\ in\ 2011\ +\ Number\ of\ births\ in\ 2012)}{Number\ of\ births\ from\ 2008\ to\ 2012} \]
\[\small Percentage\ of\ children\ aged\ 5\ and\ 6\ in\ the\ age\ group\ 5\ to\ 9\ in\ 2019\] \[\small = \frac{(Number\ of\ births\ in\ 2013\ +\ Number\ of\ births\ in\ 2014)}{Number\ of\ births\ from\ 2010\ to\ 2014} \]
The function below obtains the number of births (per thousand population) in Singapore for a particular year.
get_birth <- function(yr, df) {
df1 <- df %>%
filter(year == yr)
df1$value
}
The code chunk below:
# obtain the percentage of children aged 5 and 6 in 2017
perc17 <- (get_birth(2011, birthrate) + get_birth(2012, birthrate)) / (get_birth(2008, birthrate) + get_birth(2009, birthrate) + get_birth(2010, birthrate) + get_birth(2011, birthrate) + get_birth(2012, birthrate))
# get 2017 population data for children aged 0 to 6 for each subzone
pop17 <- population17_tidy %>%
mutate(`5_to_6` = `5_to_9` * perc17) %>%
mutate(`0_to_6` = round(`0_to_4` + `5_to_6`)) %>%
dplyr::select(PA, SZ, `0_to_6`, TOTAL)
glimpse(pop17)
## Rows: 323
## Columns: 4
## $ PA <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "A...
## $ SZ <chr> "Ang Mo Kio Town Centre", "Cheng San", "Chong Boon", "Kebu...
## $ `0_to_6` <dbl> 310, 1552, 1218, 1004, 318, 854, 416, 1372, 0, 234, 0, 137...
## $ TOTAL <dbl> 4960, 28400, 26920, 22680, 6800, 16890, 8150, 22700, 0, 40...
# obtain the percentage of children aged 5 and 6 in 2020
perc20 <- (get_birth(2013, birthrate) + get_birth(2014, birthrate)) / (get_birth(2010, birthrate) + get_birth(2011, birthrate) + get_birth(2012, birthrate) + get_birth(2013, birthrate) + get_birth(2014, birthrate))
# get 2020 population data for children aged 0 to 6 for each subzone
pop20 <- population20_tidy %>%
mutate(`5_to_6` = `5_to_9` * perc20) %>%
mutate(`0_to_6` = round(`0_to_4` + `5_to_6`)) %>%
dplyr::select(PA, SZ, `0_to_6`, TOTAL)
glimpse(pop20)
## Rows: 323
## Columns: 4
## $ PA <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "A...
## $ SZ <chr> "Ang Mo Kio Town Centre", "Cheng San", "Chong Boon", "Kebu...
## $ `0_to_6` <dbl> 273, 1448, 1190, 1064, 311, 775, 347, 1112, 0, 228, 0, 115...
## $ TOTAL <dbl> 4890, 28220, 26930, 23140, 6770, 16120, 8040, 21550, 0, 42...
Join population attribute data to the spatial subzone data.
pop17_mutate <- pop17 %>%
mutate_at(.vars = vars(PA, SZ),
.funs = funs(toupper))
subzone17 <- left_join(subzone_sf3414, pop17_mutate, by = c('SUBZONE_N' = 'SZ'))
glimpse(subzone17)
## Rows: 323
## Columns: 19
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ SUBZONE_NO <int> 2, 2, 3, 4, 5, 4, 10, 12, 4, 6, 1, 1, 3, 8, 3, 7, 9, 2, ...
## $ SUBZONE_N <chr> "PEOPLE'S PARK", "BUKIT MERAH", "CHINATOWN", "PHILLIP", ...
## $ SUBZONE_C <chr> "OTSZ02", "BMSZ02", "OTSZ03", "DTSZ04", "DTSZ05", "OTSZ0...
## $ CA_IND <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y", "Y", "...
## $ PLN_AREA_N <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN CORE", "DOW...
## $ PLN_AREA_C <chr> "OT", "BM", "OT", "DT", "DT", "OT", "BM", "DT", "BM", "D...
## $ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "C...
## $ REGION_C <chr> "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "C...
## $ INC_CRC <chr> "B4120D23006C932A", "1C51019439A68700", "0FF1661344C84AE...
## $ FMEL_UPD_D <date> 2016-05-11, 2016-05-11, 2016-05-11, 2016-05-11, 2016-05...
## $ X_ADDR <dbl> 28831.78, 26360.80, 29153.97, 29706.72, 29968.62, 29509....
## $ Y_ADDR <dbl> 29419.65, 29384.14, 29158.04, 29744.91, 29572.76, 29646....
## $ SHAPE_Leng <dbl> 1822.1927, 3074.9632, 4297.5999, 871.5549, 1872.7522, 16...
## $ SHAPE_Area <dbl> 93140.44, 411722.82, 587222.68, 39437.94, 188767.49, 133...
## $ PA <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN CORE", "DOW...
## $ `0_to_6` <dbl> 10, 50, 982, 0, 0, 52, 1060, 0, 1184, 0, 0, 364, 0, 638,...
## $ TOTAL <dbl> 340, 1130, 11410, 0, 0, 1450, 12940, 0, 16250, 0, 0, 778...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((29099.02 29..., MULTIPOLYGO...
pop20_mutate <- pop20 %>%
mutate_at(.vars = vars(PA, SZ),
.funs = funs(toupper))
subzone20 <- left_join(subzone_sf3414, pop20_mutate, by = c('SUBZONE_N' = 'SZ'))
glimpse(subzone20)
## Rows: 323
## Columns: 19
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ SUBZONE_NO <int> 2, 2, 3, 4, 5, 4, 10, 12, 4, 6, 1, 1, 3, 8, 3, 7, 9, 2, ...
## $ SUBZONE_N <chr> "PEOPLE'S PARK", "BUKIT MERAH", "CHINATOWN", "PHILLIP", ...
## $ SUBZONE_C <chr> "OTSZ02", "BMSZ02", "OTSZ03", "DTSZ04", "DTSZ05", "OTSZ0...
## $ CA_IND <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y", "Y", "...
## $ PLN_AREA_N <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN CORE", "DOW...
## $ PLN_AREA_C <chr> "OT", "BM", "OT", "DT", "DT", "OT", "BM", "DT", "BM", "D...
## $ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "C...
## $ REGION_C <chr> "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", "C...
## $ INC_CRC <chr> "B4120D23006C932A", "1C51019439A68700", "0FF1661344C84AE...
## $ FMEL_UPD_D <date> 2016-05-11, 2016-05-11, 2016-05-11, 2016-05-11, 2016-05...
## $ X_ADDR <dbl> 28831.78, 26360.80, 29153.97, 29706.72, 29968.62, 29509....
## $ Y_ADDR <dbl> 29419.65, 29384.14, 29158.04, 29744.91, 29572.76, 29646....
## $ SHAPE_Leng <dbl> 1822.1927, 3074.9632, 4297.5999, 871.5549, 1872.7522, 16...
## $ SHAPE_Area <dbl> 93140.44, 411722.82, 587222.68, 39437.94, 188767.49, 133...
## $ PA <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN CORE", "DOW...
## $ `0_to_6` <dbl> 0, 36, 731, 0, 0, 32, 1027, 0, 906, 0, 0, 258, 0, 619, 8...
## $ TOTAL <dbl> 310, 1100, 10760, 0, 0, 1350, 13030, 0, 15430, 0, 0, 663...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((29099.02 29..., MULTIPOLYGO...
The supply and demand of childcare services in 2017 and 2020 will be analysed and compared in this section.
The supply of childcare services is given by the location of childcare services, while the demand of childcare services is given by the number of potential beneficiaries (children aged 0 to 6) of childcare services.
Exploratory data analysis will be conducted, and choropleth mapping will be utilised to analyse the supply and demand of childcare services at the subzone level, in 2017 and 2020. It must be noted that normalised measures are required for choropleth maps. The normalised measures that will be utilised for choropleth mapping are described below.
The demand for childcare services in a subzone will be measured as the number of children aged 0 to 6 as a percentage of the population.
\[\small Demand\ for\ childcare\ in\ subzone = \frac{Number\ of\ children\ aged\ 0\ to\ 6\ in\ the\ subzone}{Total\ population\ of\ subzone}\]
The larger the value for demand, the higher the demand for childcare services in the subzone.
A larger demand value implies a higher percentage of children aged 0 to 6 in the subzone, who are the potential beneficiaries of childcare services.
The supply of childcare in a subzone will be measured as the number of childcare centres per square kilometer of the subzone.
\[\small Supply\ of\ childcare\ in\ subzone = \frac{Number\ of\ childcare\ centers\ in\ the\ subzone}{Total\ area\ of\ subzone\ (km^2)}\]
The larger the value for supply, the higher the supply of childcare services in the subzone.
A larger supply value implies that there are more childcare centers per square kilometer of the subzone to meet the demands of the subzone population.
Demand and supply can also be mapped as a ratio of the number of children aged 0 to 6, to the number of childcare centers in the subzone.
\[\small Children\ to\ Childcare\ Ratio\ of\ subzone = \frac{Number\ of\ children\ aged\ 0\ to\ 6\ at\ subzone}{Number\ of\ childcare\ centers\ at\ subzone}\]
This ratio is a measure of the number of children per childcare center in the subzone.
It provides a measure of whether a subzone is underserved or overserved in terms of childcare services.
The lower the children-to-childcare ratio of a subzone (except -1), the better the demand for childcare services is met in the subzone.
Note: Subzones with children aged 0 to 6 that have no childcare centers located in the subzone will be assigned a value of -1, to indicate that there is no childcare center available to serve the children living in the subzone.
Calculate the data required for choropleth mapping:
# Count number of childcare centers in each subzone (points in polygon)
subzone17$NUM_CHILDCARE <- lengths(st_intersects(subzone17, childcare17_sf3414))
subzone17_ddss <- subzone17 %>%
# Calculate demand. If there are no children, demand is 0.
mutate(DEMAND = ifelse(`0_to_6` != 0, `0_to_6` / TOTAL, 0)) %>%
# Calculate supply. If there are no childcare centers, supply is 0.
mutate(SUPPLY = ifelse(NUM_CHILDCARE != 0,
round(NUM_CHILDCARE / (SHAPE_Area/1000000), 0), 0)) %>%
# Calculate children-to-childcare ratio.
mutate(CHILD_CENTRE_RATIO = ifelse((NUM_CHILDCARE==0 & `0_to_6`>0), -1,
ifelse((NUM_CHILDCARE==0 & `0_to_6`==0), 0,
round((`0_to_6` / NUM_CHILDCARE), 0))))
glimpse(subzone17_ddss)
## Rows: 323
## Columns: 23
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ SUBZONE_NO <int> 2, 2, 3, 4, 5, 4, 10, 12, 4, 6, 1, 1, 3, 8, 3, 7...
## $ SUBZONE_N <chr> "PEOPLE'S PARK", "BUKIT MERAH", "CHINATOWN", "PH...
## $ SUBZONE_C <chr> "OTSZ02", "BMSZ02", "OTSZ03", "DTSZ04", "DTSZ05"...
## $ CA_IND <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y"...
## $ PLN_AREA_N <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN COR...
## $ PLN_AREA_C <chr> "OT", "BM", "OT", "DT", "DT", "OT", "BM", "DT", ...
## $ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REG...
## $ REGION_C <chr> "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", ...
## $ INC_CRC <chr> "B4120D23006C932A", "1C51019439A68700", "0FF1661...
## $ FMEL_UPD_D <date> 2016-05-11, 2016-05-11, 2016-05-11, 2016-05-11,...
## $ X_ADDR <dbl> 28831.78, 26360.80, 29153.97, 29706.72, 29968.62...
## $ Y_ADDR <dbl> 29419.65, 29384.14, 29158.04, 29744.91, 29572.76...
## $ SHAPE_Leng <dbl> 1822.1927, 3074.9632, 4297.5999, 871.5549, 1872....
## $ SHAPE_Area <dbl> 93140.44, 411722.82, 587222.68, 39437.94, 188767...
## $ PA <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN COR...
## $ `0_to_6` <dbl> 10, 50, 982, 0, 0, 52, 1060, 0, 1184, 0, 0, 364,...
## $ TOTAL <dbl> 340, 1130, 11410, 0, 0, 1450, 12940, 0, 16250, 0...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((29099.02 29..., MUL...
## $ NUM_CHILDCARE <int> 0, 4, 6, 1, 2, 1, 3, 0, 2, 0, 0, 5, 0, 2, 1, 9, ...
## $ DEMAND <dbl> 0.02941176, 0.04424779, 0.08606486, 0.00000000, ...
## $ SUPPLY <dbl> 0, 10, 10, 25, 11, 8, 7, 0, 6, 0, 0, 9, 0, 3, 3,...
## $ CHILD_CENTRE_RATIO <dbl> -1, 12, 164, 0, 0, 52, 353, 0, 592, 0, 0, 73, 0,...
# Count number of childcare centers in each subzone (points in polygon)
subzone20$NUM_CHILDCARE <- lengths(st_intersects(subzone20, childcare20_sf3414))
subzone20_ddss <- subzone20 %>%
# Calculate demand. If there are no children, demand is 0.
mutate(DEMAND = ifelse(`0_to_6` != 0, `0_to_6` / TOTAL, 0)) %>%
# Calculate supply. If there are no childcare centers, supply is 0.
mutate(SUPPLY = ifelse(NUM_CHILDCARE != 0,
round(NUM_CHILDCARE / (SHAPE_Area/1000000), 0), 0)) %>%
# Calculate children-to-childcare ratio.
mutate(CHILD_CENTRE_RATIO = ifelse((NUM_CHILDCARE==0 & `0_to_6`>0), -1,
ifelse((NUM_CHILDCARE==0 & `0_to_6`==0), 0,
round((`0_to_6` / NUM_CHILDCARE), 0))))
glimpse(subzone20_ddss)
## Rows: 323
## Columns: 23
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ SUBZONE_NO <int> 2, 2, 3, 4, 5, 4, 10, 12, 4, 6, 1, 1, 3, 8, 3, 7...
## $ SUBZONE_N <chr> "PEOPLE'S PARK", "BUKIT MERAH", "CHINATOWN", "PH...
## $ SUBZONE_C <chr> "OTSZ02", "BMSZ02", "OTSZ03", "DTSZ04", "DTSZ05"...
## $ CA_IND <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y"...
## $ PLN_AREA_N <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN COR...
## $ PLN_AREA_C <chr> "OT", "BM", "OT", "DT", "DT", "OT", "BM", "DT", ...
## $ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REG...
## $ REGION_C <chr> "CR", "CR", "CR", "CR", "CR", "CR", "CR", "CR", ...
## $ INC_CRC <chr> "B4120D23006C932A", "1C51019439A68700", "0FF1661...
## $ FMEL_UPD_D <date> 2016-05-11, 2016-05-11, 2016-05-11, 2016-05-11,...
## $ X_ADDR <dbl> 28831.78, 26360.80, 29153.97, 29706.72, 29968.62...
## $ Y_ADDR <dbl> 29419.65, 29384.14, 29158.04, 29744.91, 29572.76...
## $ SHAPE_Leng <dbl> 1822.1927, 3074.9632, 4297.5999, 871.5549, 1872....
## $ SHAPE_Area <dbl> 93140.44, 411722.82, 587222.68, 39437.94, 188767...
## $ PA <chr> "OUTRAM", "BUKIT MERAH", "OUTRAM", "DOWNTOWN COR...
## $ `0_to_6` <dbl> 0, 36, 731, 0, 0, 32, 1027, 0, 906, 0, 0, 258, 0...
## $ TOTAL <dbl> 310, 1100, 10760, 0, 0, 1350, 13030, 0, 15430, 0...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((29099.02 29..., MUL...
## $ NUM_CHILDCARE <int> 0, 3, 9, 1, 3, 0, 4, 0, 3, 1, 0, 4, 0, 3, 1, 8, ...
## $ DEMAND <dbl> 0.00000000, 0.03272727, 0.06793680, 0.00000000, ...
## $ SUPPLY <dbl> 0, 7, 15, 25, 16, 0, 9, 0, 9, 4, 0, 7, 0, 5, 3, ...
## $ CHILD_CENTRE_RATIO <dbl> 0, 12, 81, 0, 0, -1, 257, 0, 302, 0, 0, 64, 0, 2...
Summary statistics of demand for childcare services in subzones
summary(subzone17_ddss$DEMAND)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.05114 0.04683 0.06882 0.18537
Box plot to visualise extreme values
ggplot(subzone17_ddss, aes(x = '', y = DEMAND)) +
geom_boxplot() +
labs(x='', y='Demand') +
theme_minimal()
Histogram visualising the distribution of demand for childcare services in subzones
ggplot(subzone17_ddss, aes(x = DEMAND)) +
geom_histogram(fill = 'darksalmon') +
labs(title = 'Distribution of demand for childcare services in subzones',
x = 'Demand for childcare services',
y = 'Frequency') +
theme_minimal()
Top 10 subzones with highest demand for childcare services
top10_demand_17 = top_n(subzone17_ddss, 10, DEMAND)
ggplot(top10_demand_17, aes(x=DEMAND, y=reorder(SUBZONE_N, DEMAND), label=round(DEMAND, 2))) +
geom_col(fill='darksalmon') +
labs(title='Top 10 subzones with highest demand for childcare',
x='Demand',
y='Subzone') +
geom_text(nudge_x=0.01, colour='gray23', size=3.5) +
theme_minimal()
Box map
A box map will be used to visualise demand spatially across different subzones in Singapore.
The following code chunks are functions to construct the box map.
# To create break points for box map
boxbreaks <- function(v, mult=1.5) {
qv <- unname(quantile(v))
iqr <- qv[4] - qv[2]
# upfence and lofence define the area where points will be defined as outliers
upfence <- qv[4] + mult * iqr
lofence <- qv[2] - mult * iqr
# initialize break points vector
bb <- vector(mode="numeric",length=7)
# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- floor(qv[1])
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- ceiling(qv[5])
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]
return(bb)
}
# Extract variable as vector out of sf dataframe
get.var <- function(vname, df) {
v <- df[vname] %>% st_set_geometry(NULL)
v <- unname(v[,1])
return(v)
}
# Boxmap function
boxmap <- function(vnam, df, mtitle, legtitle=NA, mult=1.5, palette='-RdBu') {
df1 <- drop_na(df)
var <- get.var(vnam,df1)
bb <- boxbreaks(var)
tm_shape(df) +
tm_fill(vnam,
title=legtitle,
breaks=bb,
palette=palette,
labels = c("Lower outlier", "< 25%", "25% - 50%", "50% - 75%","> 75%", "Upper outlier")) +
tm_borders(lwd=0.1, alpha=1) +
tm_layout(main.title = mtitle,
main.title.position = 'center',
main.title.size = 1,
frame = FALSE) +
tm_scale_bar(width = 0.15)
}
# Boxmap function with points overlayed on top of choropleth map
boxmap_pts <- function(vnam, df, pointdf, mtitle, legtitle=NA, mult=1.5, palette='-RdBu') {
boxmap(vnam, df, mtitle, legtitle=legtitle, mult=mult, palette=palette) +
tm_shape(pointdf) +
tm_dots(col="gray23")
}
Demand for childcare services in 2017
dd_boxmap17 <- boxmap("DEMAND", subzone17_ddss, mtitle="Demand of Childcare Services in 2017")
dd_boxmap17
Overlay locations of childcare services on top of the box map
boxmap_pts("DEMAND", subzone17_ddss, childcare17_sf3414, mtitle='Demand and Location of Childcare Services in 2017')
Summary statistics of supply of childcare services in subzones
summary(subzone17_ddss$SUPPLY)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 3.000 3.916 6.500 28.000
Box plot to visualise extreme values
ggplot(subzone17_ddss, aes(x = '', y = SUPPLY)) +
geom_boxplot() +
labs(x='', y='Supply') +
theme_minimal()
Histogram visualising the distribution of supply for childcare services in subzones.
ggplot(subzone17_ddss, aes(x = SUPPLY)) +
geom_histogram(fill = 'darkseagreen4') +
labs(title = 'Distribution of supply for childcare services in subzones',
x = 'Supply for childcare services',
y = 'Frequency') +
theme_minimal()
Top 10 subzones with highest supply of childcare services
top10_supply_17 = top_n(subzone17_ddss, 10, SUPPLY)
ggplot(top10_supply_17, aes(x=SUPPLY, y=reorder(SUBZONE_N, SUPPLY), label=round(SUPPLY, 2))) +
geom_col(fill='darkseagreen4') +
labs(title='Top 10 subzones with highest supply of childcare',
x='Supply',
y='Subzone') +
geom_text(nudge_x=0.8, colour='gray23', size=3.5) +
theme_minimal()
Box map
Similar to how demand was visualised, a box map will be utilised to visualise supply spatially across different subzones in Singapore. The box map will enable statistical interpretation of outliers and better identification of subzones that have relatively higher or lower supply compared to the rest of the subzones in Singapore.
Supply of childcare services in 2017
ss_boxmap17 <- boxmap("SUPPLY", subzone17_ddss, mtitle="Supply of Childcare Services in 2017")
ss_boxmap17
The children-to-childcare ratio is a measure of both demand and supply. The ratio is a measure of the population of children aged 0 to 6 per childcare center available in the subzone, providing an idea of whether a subzone is underserved or overserved.
To understand whether there are sufficient childcare centers to cater to the demand of the subzone population, the capacity of childcare centers have to be considered in union with the children-to-childcare ratio.
\[\small Average\ childcare\ enrolment\ capacity = \frac{Total\ capacity\ in\ childcare\ centres\ in\ Singapore}{Total\ number\ of\ childcare\ centres\ in Singapore}\]
\[\small Average\ childcare\ enrolment\ capacity = \frac{165,919}{1,486} = 112\]
Summary statistics of children-to-childcare ratio
summary(subzone17_ddss$CHILD_CENTRE_RATIO)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.0 0.0 78.0 135.4 214.0 952.0
Box plot to visualise extreme values for children-to-childcare ratio
ggplot(subzone17_ddss, aes(x = '', y = CHILD_CENTRE_RATIO)) +
geom_boxplot() +
labs(x='', y='Children-To-Childcare Ratio') +
theme_minimal()
Histogram visualising the distribution of children-to-childcare ratio in subzones
ggplot(subzone17_ddss, aes(x = CHILD_CENTRE_RATIO)) +
geom_histogram(fill = 'thistle4') +
labs(title = 'Distribution of children-to-childcare ratio in subzones',
x = 'Children-To-Childcare Ratio',
y = 'Frequency') +
theme_minimal()
Top 10 underserved subzones: highest children-to-childcare ratio
top10_ratio_17 = top_n(subzone17_ddss, 10, CHILD_CENTRE_RATIO)
ggplot(top10_ratio_17, aes(x=CHILD_CENTRE_RATIO, y=reorder(SUBZONE_N, CHILD_CENTRE_RATIO), label=round(CHILD_CENTRE_RATIO))) +
geom_col(fill='thistle4') +
labs(title='Top 10 subzones with highest children-to-childcare ratio',
x='Children-To-Childcare Ratio',
y='Subzone') +
geom_text(nudge_x=45, colour='gray23', size=3.5) +
theme_minimal()
Choropleth Map
To better understand how the demand for childcare services in a subzone is matched by supply, a customised classification scheme will be used to visualise the chilren-to-childcare ratio spatially across subzones in Singapore.
The values of the children-to-childcare ratio are classified according to the following scheme:
The function below creates the choropleth map with the custom classification scheme.
choropleth_ratio <- function(df, mtitle='Children-to-childcare Ratio', palette='-RdGy') {
df$category[df$CHILD_CENTRE_RATIO == -1] = "-1"
df$category[df$CHILD_CENTRE_RATIO == 0] = "0"
df$category[df$CHILD_CENTRE_RATIO > 0 & df$CHILD_CENTRE_RATIO <= 112] = "0 - 112"
df$category[df$CHILD_CENTRE_RATIO > 112] = "Above 112"
tm_shape(df) +
tm_fill('category',
palette = palette,
title = 'CHILDREN-TO-CHILDCARE\nRATIO') +
tm_borders(lwd = 0.1,
alpha = 1) +
tm_layout(main.title = mtitle,
main.title.position = 'center',
main.title.size = 1,
frame = FALSE) +
tm_scale_bar(width = 0.15)
}
Children-to-childcare ratio for childcare services in 2017
ratio_choropleth17 <- choropleth_ratio(subzone17_ddss, mtitle = 'Children-To-Childcare Ratio in 2017')
ratio_choropleth17
Overlay locations of childcare services on top of the choropleth map
choropleth_ratio(subzone17_ddss, mtitle = 'Childcare Locations and Children-To-Childcare Ratio in 2017') +
tm_shape(childcare17_sf3414) +
tm_dots()
Box Map
To conduct more statistically sound comparisons across subzones in Singapore, a box map is utilised to enable statistical interpretation of data.
ratio_boxmap17 <- boxmap("CHILD_CENTRE_RATIO", subzone17_ddss, mtitle="Children-To-Childcare Ratio in 2017", legtitle = 'CHILDREN-TO-CHILDCARE\nRATIO')
ratio_boxmap17
Similar to the analysis for childcare services in 2017, the same will be conducted for childcare services in 2020. Differences in observations between 2017 and 2020 will then be compared.
Summary statistics of demand for childcare services in subzones
summary(subzone20_ddss$DEMAND)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.04769 0.04361 0.06503 0.16909
Box plot to visualise extreme values
ggplot(subzone20_ddss, aes(x = '', y = DEMAND)) +
geom_boxplot() +
labs(x='', y='Demand') +
theme_minimal()
Histogram visualising the distribution of demand for childcare services in subzones
ggplot(subzone20_ddss, aes(x = DEMAND)) +
geom_histogram(fill = 'darksalmon') +
labs(title = 'Distribution of demand for childcare services in subzones',
x = 'Demand for childcare services',
y = 'Frequency') +
theme_minimal()
Top 10 subzones with highest demand for childcare services
top10_demand_20 = top_n(subzone20_ddss, 10, DEMAND)
ggplot(top10_demand_20, aes(x=DEMAND, y=reorder(SUBZONE_N, DEMAND), label=round(DEMAND, 2))) +
geom_col(fill='darksalmon') +
labs(title='Top 10 subzones with highest demand for childcare',
x='Demand',
y='Subzone') +
geom_text(nudge_x=0.01, colour='gray23', size=3.5) +
theme_minimal()
Box map
A box map will be used to visualise demand spatially across different subzones in Singapore.
Demand for childcare services in 2020
dd_boxmap20 <- boxmap("DEMAND", subzone20_ddss, mtitle="Demand of Childcare Services in 2020")
dd_boxmap20
Overlay locations of childcare services on top of the box map
boxmap_pts("DEMAND", subzone20_ddss, childcare20_sf3414, mtitle='Demand and Location of Childcare Services in 2020')
Visualise and compare demand for childcare services in 2017 and 2020
To better compare changes in demand for childcare services between 2017 and 2020, the box map for demand of childcare services in 2017 and 2020 are placed side-by-side.
tmap_arrange(dd_boxmap17, dd_boxmap20, ncol=2)
Summary statistics of supply of childcare services in subzones
summary(subzone20_ddss$SUPPLY)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 3.000 4.601 8.000 36.000
Box plot to visualise extreme values
ggplot(subzone20_ddss, aes(x = '', y = SUPPLY)) +
geom_boxplot() +
labs(x='', y='Supply') +
theme_minimal()
Histogram visualising the distribution of supply for childcare services in subzones.
ggplot(subzone20_ddss, aes(x = SUPPLY)) +
geom_histogram(fill = 'darkseagreen4') +
labs(title = 'Distribution of supply for childcare services in subzones',
x = 'Supply for childcare services',
y = 'Frequency') +
theme_minimal()
Top 10 subzones with highest supply of childcare services
top10_supply_20 = top_n(subzone20_ddss, 10, SUPPLY)
ggplot(top10_supply_20, aes(x=SUPPLY, y=reorder(SUBZONE_N, SUPPLY), label=round(SUPPLY, 2))) +
geom_col(fill='darkseagreen4') +
labs(title='Top 10 subzones with highest supply of childcare',
x='Supply',
y='Subzone') +
geom_text(nudge_x=0.8, colour='gray23', size=3.5) +
theme_minimal()
Box map
Similar to how demand was visualised, a box map will be utilised to visualise supply spatially across different subzones in Singapore.
Supply of childcare services in 2020
ss_boxmap20 <- boxmap("SUPPLY", subzone20_ddss, mtitle="Supply of Childcare Services in 2020")
ss_boxmap20
Visualise and compare supply of childcare services in 2017 and 2020
To better compare changes in supply for childcare services between 2017 and 2020, the box map for supply of childcare services in 2017 and 2020 are placed side-by-side.
tmap_arrange(ss_boxmap17, ss_boxmap20, ncol=2)
Similar to the analysis for 2017 data, the children-to-childcare ratio will be utilised to analyse demand and supply of childcare services. Average childcare capacity in Singapore is taken to be 112 children per centre.
Summary statistics of children-to-childcare ratio
summary(subzone20_ddss$CHILD_CENTRE_RATIO)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.0 0.0 87.0 112.5 183.0 803.0
Box plot to visualise extreme values for children-to-childcare ratio
ggplot(subzone20_ddss, aes(x = '', y = CHILD_CENTRE_RATIO)) +
geom_boxplot() +
labs(x='', y='Children-To-Childcare Ratio') +
theme_minimal()
Histogram visualising the distribution of children-to-childcare ratio in subzones
ggplot(subzone20_ddss, aes(x = CHILD_CENTRE_RATIO)) +
geom_histogram(fill = 'thistle4') +
labs(title = 'Distribution of children-to-childcare ratio in subzones',
x = 'Children-To-Childcare Ratio',
y = 'Frequency') +
theme_minimal()
Top 10 underserved subzones: highest children-to-childcare ratio
top10_ratio_20 = top_n(subzone20_ddss, 10, CHILD_CENTRE_RATIO)
ggplot(top10_ratio_20, aes(x=CHILD_CENTRE_RATIO, y=reorder(SUBZONE_N, CHILD_CENTRE_RATIO), label=round(CHILD_CENTRE_RATIO))) +
geom_col(fill='thistle4') +
labs(title='Top 10 subzones with highest children-to-childcare ratio',
x='Children-To-Childcare Ratio',
y='Subzone') +
geom_text(nudge_x=45, colour='gray23', size=3.5) +
theme_minimal()
Choropleth Map
To better understand how the demand for childcare services in a subzone is matched by supply, a customised classification scheme will be used to visualise the chilren-to-childcare ratio spatially across subzones in Singapore.
The values of the children-to-childcare ratio are classified according to the following scheme:
Children-to-childcare ratio for childcare services in 2020
ratio_choropleth20 <- choropleth_ratio(subzone20_ddss, mtitle = 'Children-To-Childcare Ratio in 2020')
ratio_choropleth20
Compare children-to-childcare ratio in 2017 and 2020
The choropleth map for chilren-to-childcare ratio for 2017 and 2020 are placed side-by-side for comparison.
tmap_arrange(ratio_choropleth17, ratio_choropleth20, ncol=2)
While there are still many subzones that are underserved (deep red), there are now less subzones that are underserved in 2020 than in 2017, especially in the Northeast, East and West regions of Singapore. This can be observed by a lesser number of subzones in deep red in the 2020 map than in the 2017 map. Furthermore, in the Central region, it can be observed that many subzones which originally did not have any childcare services to meet its demand (dark grey) in 2017, now has childcare centres built in the subzones, albeit still being underserved (red).
This suggests that there is a general improvement from 2017, in the gap between demand and supply of childcare services in subzones. The demand and supply gap for some of the subzones have been bridged either by a decrease in demand for childcare services (due to a decrease in number of children aged 0 to 6), or by an increase in demand of childcare services (where more childcare centres have been built). Children that had been limited in opportunities to utilise childcare services in 2017 now have more opportunities to utilise them.
Box Map
To conduct more statistically sound comparisons across subzones in Singapore, a box map is utilised to enable statistical interpretation of data.
ratio_boxmap20 <- boxmap("CHILD_CENTRE_RATIO", subzone20_ddss, mtitle="Children-To-Childcare Ratio in 2020", legtitle = 'CHILDREN-TO-CHILDCARE\nRATIO')
ratio_boxmap20
Compare children-to-childcare ratio in 2017 and 2020 with box map
tmap_arrange(ratio_boxmap17, ratio_boxmap20, ncol=2)
In this section, temporal changes of childcare services from 2017 to 2020 will be analysed spatially.
Calculate the difference in number of childcare services for each subzone
subzone1720 <- st_join(subzone17, subzone20,
join = st_equals,
suffix = c('_2017', '_2020')) %>%
mutate(DIFF_CHILDCARE = NUM_CHILDCARE_2020 - NUM_CHILDCARE_2017)
summary(subzone1720$DIFF_CHILDCARE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.0000 0.0000 0.0000 0.7214 1.0000 10.0000
The function below creates a dot distribution map.
dotdistmap <- function(polygon_df, point_df, mtitle) {
tm_shape(polygon_df) +
tm_fill(col = 'gray94') +
tm_borders(col = 'gray28', lwd = 0.1, alpha = 0.3) +
tm_shape(point_df) +
tm_dots(col = 'tomato3') +
tm_layout(frame.lwd = 0.1,
frame = 'gray31',
main.title = mtitle,
main.title.position = 'center',
main.title.size = 1.5,
main.title.color = 'gray28')
}
Childcare services in 2017 and 2020
tmap_arrange(dotdistmap(subzone_sf3414, childcare17_sf3414, '2017'),
dotdistmap(subzone_sf3414, childcare20_sf3414, '2020'),
ncol = 2)
To better visualise the changes in childcare services across subzones, childcare location points for 2017 and 2020 were overlayed on top of each other.
Childcare services in 2017 and 2020
pointmap <- tm_shape(subzone_sf3414) +
tm_fill(col = 'gray94') +
tm_borders(col = 'gray28', lwd = 0.1, alpha = 0.3) +
tm_shape(childcare17_sf3414) +
tm_symbols(size = 0.1,
col = 'red1',
alpha = 0.5,
border.lwd = NA) +
tm_shape(childcare20_sf3414) +
tm_symbols(size = 0.1,
col = 'yellow1',
alpha = 0.5,
border.lwd = NA) +
tm_add_legend(type = 'symbol',
title = 'Childcare',
labels = c('2017', '2020'),
col = c('red1', 'yellow1'),
alpha = 0.7,
border.lwd = 0.01) +
tm_layout(main.title = 'Childcare Services in 2017 and 2020',
main.title.position = 'center',
main.title.size = 1,
frame = FALSE) +
tm_scale_bar(width = 0.15)
pointmap
To analyse the change in childcare services from 2017 to 2020 at a subzone level, a choropleth map will be constructed to find out which subzones increased or decreased its number of childcare services offered. There will be three categories: (1) Increased, (2) No Change, (3) Decreased.
Construct choropleth map of change in childcare services
subzone1720$diff_category[subzone1720$DIFF_CHILDCARE < 0] = "Decreased"
subzone1720$diff_category[subzone1720$DIFF_CHILDCARE == 0] = "No Change"
subzone1720$diff_category[subzone1720$DIFF_CHILDCARE > 0] = "Increased"
change_choropleth <- tm_shape(subzone1720) +
tm_fill('diff_category',
palette = c('thistle3', 'darkseagreen', 'gray90'),
title = 'CHANGE') +
tm_borders(lwd = 0.1,
alpha = 1) +
tm_layout(main.title = "Change in Number of Childcare Services\nFrom 2017 to 2020",
main.title.position = 'center',
main.title.size = 1,
frame = FALSE) +
tm_scale_bar(width = 0.15)
change_choropleth
To better assess whether changes in childcare services between 2017 and 2020 correspond to the demand of childcare services in subzones, the changes will be compared with demand at a subzone level.
Compare changes in childcare services with demand in 2017
tmap_arrange(change_choropleth, dd_boxmap17, ncol=2)
Compare changes in childcare services with demand in 2020
tmap_arrange(change_choropleth, dd_boxmap20, ncol=2)
In conclusion, the analysis and comparison of the demand and supply and childcare services in 2017 and 2020 revealed that there have been improvements in increasing the supply of childcare centres to meet the high demand of subzones. However, more can be done to bridge the gap in many of the subzones in Singapore which are still underserved in terms of childcare services.
This section analyses the spatial point pattern of childcare services in Singapore.
Visualise childcare locations in 2017 and 2020 using a point symbol map.
The following function creates the point symbol map.
point_symbol_map <- function(polygon_df, point_df, mtitle)
tm_shape(polygon_df) +
tm_fill(col = 'gray94') +
tm_borders(col = 'gray28', lwd = 0.1, alpha = 0.3) +
tm_shape(point_df) +
tm_bubbles(col = 'tomato3',
size = 0.1,
alpha = 0.8,
border.col = 'gray31',
border.lwd = 0.1) +
tm_layout(frame = FALSE,
main.title = mtitle,
main.title.position = 'center',
main.title.size = 1) +
tm_scale_bar(width = 0.15)
point_symbol_map(subzone_sf3414, childcare17_sf3414, 'Childcare Locations in 2017')
point_symbol_map(subzone_sf3414, childcare20_sf3414, 'Childcare Locations in 2020')
In order to statistically verify the above observations that the locations of childcare centres are not randomly distributed and whether there is clustering, second order point pattern analysis will be conducted.
Due to the intensive computational power required to conduct analysis on the whole of Singapore, analysis will be conducted on selected planning areas of Singapore (highlighted in pink in the map below). These study areas are:
subzone_sf3414$STUDY_AREA[(subzone_sf3414$PLN_AREA_N=='SENGKANG') | (subzone_sf3414$PLN_AREA_N=='BEDOK') | (subzone_sf3414$PLN_AREA_N=='BUKIT BATOK') | (subzone_sf3414$PLN_AREA_N=='HOUGANG')] = "Y"
subzone_sf3414$STUDY_AREA[is.na(subzone_sf3414$STUDY_AREA)] = 'N'
tm_shape(subzone_sf3414) +
tm_fill('STUDY_AREA',
palette = c('grey90', 'rosybrown2')) +
tm_borders(col = 'gray28', lwd = 0.1, alpha = 0.3) +
tm_layout(legend.show = FALSE,
frame = FALSE)
Convert and prepare data into an appropriate format for spatial point pattern analysis.
childcare17_ppp <- childcare17_sf3414 %>%
as('Spatial') %>%
as('SpatialPoints') %>%
as('ppp')
summary(childcare17_ppp)
## Planar point pattern: 1312 points
## Average intensity 1.623186e-06 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 3 decimal places
## i.e. rounded to the nearest multiple of 0.001 units
##
## Window: rectangle = [11203.01, 45404.24] x [25667.6, 49300.88] units
## (34200 x 23630 units)
## Window area = 808287000 square units
childcare20_ppp <- childcare20_sf3414 %>%
as('Spatial') %>%
as('SpatialPoints') %>%
as('ppp')
summary(childcare20_ppp)
## Planar point pattern: 1545 points
## Average intensity 1.91145e-06 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 3 decimal places
## i.e. rounded to the nearest multiple of 0.001 units
##
## Window: rectangle = [11203.01, 45404.24] x [25667.6, 49300.88] units
## (34200 x 23630 units)
## Window area = 808287000 square units
Duplicated points in the childcare data must be handled, as the statistical methodology used assumes simple spatial point processes, hence points cannot be coincidental.
any(duplicated(childcare17_ppp))
## [1] TRUE
sum(multiplicity(childcare17_ppp) > 1)
## [1] 85
any(duplicated(childcare20_ppp))
## [1] TRUE
sum(multiplicity(childcare20_ppp) > 1)
## [1] 128
childcare17_ppp_jit <- rjitter(childcare17_ppp,
retry = TRUE,
nsim = 1,
drop = TRUE)
sum(multiplicity(childcare17_ppp_jit) > 1)
## [1] 0
childcare20_ppp_jit <- rjitter(childcare20_ppp,
retry = TRUE,
nsim = 1,
drop = TRUE)
sum(multiplicity(childcare20_ppp_jit) > 1)
## [1] 0
The analysis of spatial point patterns will be will be confined to four geographical areas of Singapore: Sengkang, Hougang, Bedok and Bukit Batok.
Function to extract study area and convert to owin object.
get_owin <- function(subzone, pln_area_n) {
subzone[subzone$PLN_AREA_N == pln_area_n,] %>%
as('Spatial') %>%
as('SpatialPolygons') %>%
as('owin')
}
Create owin objects for study areas.
sengkang_owin <- get_owin(subzone_sf3414, "SENGKANG")
sengkang_owin
## window: polygonal boundary
## enclosing rectangle: [30122.19, 37368.16] x [39760.83, 42585.83] units
hougang_owin <- get_owin(subzone_sf3414, "HOUGANG")
hougang_owin
## window: polygonal boundary
## enclosing rectangle: [32428.23, 36538.74] x [35083.89, 41180.3] units
bedok_owin <- get_owin(subzone_sf3414, "BEDOK")
bedok_owin
## window: polygonal boundary
## enclosing rectangle: [34995.38, 42502.87] x [31574.68, 36745.39] units
bukitbatok_owin <- get_owin(subzone_sf3414, "BUKIT BATOK")
bukitbatok_owin
## window: polygonal boundary
## enclosing rectangle: [17231.102, 20996.085] x [34961.52, 40182.48] units
subzone_owin <- subzone_sf3414 %>%
as('Spatial') %>%
as('SpatialPolygons') %>%
as('owin')
subzone_owin
## window: polygonal boundary
## enclosing rectangle: [2667.54, 56396.44] x [15748.72, 50256.33] units
Combine the childcare points with the study area.
childcare17_sengkang_ppp <- childcare17_ppp_jit[sengkang_owin]
childcare17_hougang_ppp <- childcare17_ppp_jit[hougang_owin]
childcare17_bedok_ppp <- childcare17_ppp_jit[bedok_owin]
childcare17_bukitbatok_ppp <- childcare17_ppp_jit[bukitbatok_owin]
childcare20_sengkang_ppp <- childcare20_ppp_jit[sengkang_owin]
childcare20_hougang_ppp <- childcare20_ppp_jit[hougang_owin]
childcare20_bedok_ppp <- childcare20_ppp_jit[bedok_owin]
childcare20_bukitbatok_ppp <- childcare20_ppp_jit[bukitbatok_owin]
childcare17_sg_ppp <- childcare17_ppp_jit[subzone_owin]
childcare20_sg_ppp <- childcare20_ppp_jit[subzone_owin]
Visualise ppp objects to ensure that there are no errors in data preparation and conversion.
For childcare services in 2017:
par(mfrow=c(2,2))
plot(childcare17_sengkang_ppp, main = 'Sengkang')
plot(childcare17_hougang_ppp, main = 'Hougang')
plot(childcare17_bedok_ppp, main = 'Bedok')
plot(childcare17_bukitbatok_ppp, main = 'Bukit Batok')
mtext('Childcare Services in 2017', outer=TRUE, side=3, line=-12)
For childcare services in 2020:
par(mfrow=c(2,2))
plot(childcare20_sengkang_ppp, main = 'Sengkang')
plot(childcare20_hougang_ppp, main = 'Hougang')
plot(childcare20_bedok_ppp, main = 'Bedok')
plot(childcare20_bukitbatok_ppp, main = 'Bukit Batok')
mtext('Childcare Services in 2020', outer=TRUE, side=3, line=-12)
Hypothesis testing will be conducted utilising second-order statistics (L function), to assess if the observed point pattern for childcare services (in 2017 and 2020) is significantly different from a homogeneous Poisson process (Complete Spatial Randomness, CSR).
The general K function is given by:
\[K(r) = \frac{E[N_0(r)]}{\lambda}\] Where,
The L function is a variance-stabilising transformation of the K function:
\[L(r) = \sqrt{\frac{K(r)}{\pi}}\]
Methodology and Interpretation
The function below computes and plots the L function estimate for a point pattern. Ripley edge correction is used to account for edge effects.
l_estimate <- function(ppp) {
l_est <- Lest(ppp, correction = 'Ripley')
plot(l_est,
. -r ~ r,
ylab = 'L(r) - r',
xlab = 'r (metres)',
main = 'L-Estimate')
}
The function below conducts the Monte Carlo test for CSR with L function, and plots the results.
l_montecarlo <- function(ppp, mtitle) {
# Set seed such that same results will be obtained from MC test every time
set.seed(1234)
l_mc <- envelope(ppp, Lest, nsim = 199)
l_data <- as.data.frame(l_mc)
l_data <- l_data[-1,]
colour <- c("#d73027", "#ffffbf", "#91bfdb")
test <- ggplot(l_data, aes(x=r, y=obs-r))+
# plot observed value
geom_line(colour=c("#4d4d4d"), aes(text = sprintf('L(r) - r: %f \nr: %f', obs-r, r), group=1))+
# plot simulation envelopes
geom_ribbon(aes(ymin=(lo-r),ymax=(hi-r), alpha=0.1, colour=c("#e0e0e0"))) +
xlab("Distance, r (metres)") +
ylab("L(r) - r") +
# plot expected value, which is equal to 0
geom_hline(yintercept=0, linetype = "dashed", colour=c("#800000")) +
# plot 'Quantums'
geom_rug(data=l_data[(l_data$obs-l_data$r) > (l_data$hi-l_data$r),], sides="b", colour=colour[1]) +
geom_rug(data=l_data[(l_data$obs-l_data$r) < (l_data$lo-l_data$r),], sides="b", colour=colour[2]) +
geom_rug(data=l_data[(l_data$obs-l_data$r) >= (l_data$lo-l_data$r) & (l_data$obs-l_data$r) <= (l_data$hi-l_data$r),], sides="b", color=colour[3]) +
# turn off all legends
theme(legend.position="none") +
ggtitle(sprintf('Childcare Services in %s: L(r) - r with Randomisation Envelope', mtitle))
ggplotly(test, tooltip = "text")
}
Compute L Function estimate
l_estimate(childcare17_sengkang_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Sengkang in 2017 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare17_sengkang_ppp, 'Sengkang')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare20_sengkang_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Sengkang in 2020 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare20_sengkang_ppp, 'Sengkang')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare17_hougang_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Hougang in 2017 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare17_hougang_ppp, 'Hougang')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare20_hougang_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Hougang in 2020 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare20_hougang_ppp, 'Hougang')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare17_bedok_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Bedok in 2017 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare17_bedok_ppp, 'Bedok')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare20_bedok_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Bedok in 2020 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare20_bedok_ppp, 'Bedok')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare17_bukitbatok_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Bukit Batok in 2017 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare17_bukitbatok_ppp, 'Bukit Batok')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
Compute L Function estimate
l_estimate(childcare20_bukitbatok_ppp)
To confirm the observed spatial pattern and assess if the distribution of childcare services at Bukit Batok in 2020 is significantly different from a homogeneous Poisson process, conduct Monte Carlo test with L function to test the hypothesis.
Monte Carlo Test with L Function
l_montecarlo(childcare20_bukitbatok_ppp, 'Bukit Batok')
## Generating 199 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.40
## .42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78.80
## .82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.118.120
## .122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156.158.160
## .162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.196.198 199.
##
## Done.
The second-order analysis of the spatial point pattern of childcare services reveals that there are certain subzones in Singapore with statistically significant clustering of childcare services. This can signify strategic planning of childcare services provision by the government or the individual service providers. More can be done to understand the accessibility of subzone populations to areas with high concentration of childcare services.
Kernel density estimation (KDE) will be utilised to visualise the spatial patterns of childcare services and identify areas with high concentration of childcare services.
Rescale data such that density values are computed as the \(number\ of\ childcare\ points\ per\ square\ kilometre\), for easier interpretation.
childcare17_sengkang_ppp_km <- rescale(childcare17_sengkang_ppp, 1000, 'km')
childcare17_hougang_ppp_km <- rescale(childcare17_hougang_ppp, 1000, 'km')
childcare17_bedok_ppp_km <- rescale(childcare17_bedok_ppp, 1000, 'km')
childcare17_bukitbatok_ppp_km <- rescale(childcare17_bukitbatok_ppp, 1000, 'km')
childcare20_sengkang_ppp_km <- rescale(childcare20_sengkang_ppp, 1000, 'km')
childcare20_hougang_ppp_km <- rescale(childcare20_hougang_ppp, 1000, 'km')
childcare20_bedok_ppp_km <- rescale(childcare20_bedok_ppp, 1000, 'km')
childcare20_bukitbatok_ppp_km <- rescale(childcare20_bukitbatok_ppp, 1000, 'km')
childcare17_sg_ppp_km <- rescale(childcare17_sg_ppp, 1000, 'km')
childcare20_sg_ppp_km <- rescale(childcare20_sg_ppp, 1000, 'km')
The function below will compute KDE and plot the kernel density map.
kde <- function(ppp, sigma, pln_area, year) {
kde_map <- density(ppp,
sigma = sigma,
edge = TRUE,
kernel = 'gaussian')
plot(kde_map,
main = sprintf('Childcare Services at %s in %s', pln_area, year),
box = FALSE)
}
kde(childcare17_sengkang_ppp_km, 0.105, 'Sengkang', '2017')
kde(childcare20_sengkang_ppp_km, 0.1, 'Sengkang', '2020')
kde(childcare17_hougang_ppp_km, 0.6, 'Hougang', '2017')
kde(childcare20_hougang_ppp_km, 0.688, 'Hougang', '2020')
bw.ppl(childcare17_bedok_ppp_km)
## sigma
## 0.4225241
kde(childcare17_bedok_ppp_km, bw.ppl, 'Bedok', '2017')
bw.ppl(childcare20_bedok_ppp_km)
## sigma
## 0.4684795
kde(childcare20_bedok_ppp_km, bw.ppl, 'Bedok', '2020')
kde(childcare17_bukitbatok_ppp_km, 0.4, 'Bukit Batok', '2017')
kde(childcare20_bukitbatok_ppp_km, 0.55, 'Bukit Batok', '2020')
Kernel density maps of childcare services in 2017 and 2020 across Singapore will be plotted.
kde_childcare17 <- density(childcare17_sg_ppp_km, sigma = bw.ppl, edge = TRUE, kernel = 'gaussian')
plot(kde_childcare17, main = 'Childcare Services in 2017', box = FALSE)
kde_childcare20 <- density(childcare20_sg_ppp_km, sigma = bw.ppl, edge = TRUE, kernel = 'gaussian')
plot(kde_childcare20, main = 'Childcare Services in 2020', box = FALSE)
The kernel density maps for Singapore will plotted on top of OpenStreetMap.
kde_childcare17_raster <- kde_childcare17 %>%
as.SpatialGridDataFrame.im() %>%
raster()
kde_childcare17_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 0.419757, 0.2695907 (x, y)
## extent : 2.667538, 56.39644, 15.74872, 50.25633 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : v
## values : -7.348703e-15, 32.11809 (min, max)
kde_childcare20_raster <- kde_childcare20 %>%
as.SpatialGridDataFrame.im() %>%
raster()
kde_childcare20_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 0.419757, 0.2695907 (x, y)
## extent : 2.667538, 56.39644, 15.74872, 50.25633 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : v
## values : -1.291346e-14, 29.18508 (min, max)
projection(kde_childcare17_raster) <- CRS('+init=EPSG:3414')
kde_childcare17_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 0.419757, 0.2695907 (x, y)
## extent : 2.667538, 56.39644, 15.74872, 50.25633 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## source : memory
## names : v
## values : -7.348703e-15, 32.11809 (min, max)
projection(kde_childcare20_raster) <- CRS('+init=EPSG:3414')
kde_childcare20_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 0.419757, 0.2695907 (x, y)
## extent : 2.667538, 56.39644, 15.74872, 50.25633 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## source : memory
## names : v
## values : -1.291346e-14, 29.18508 (min, max)
tmap_mode('view')
## tmap mode set to interactive viewing
tm_basemap('OpenStreetMap') +
tm_shape(kde_childcare17_raster) +
tm_raster('v') +
tm_layout(legend.position = c('right', 'bottom'),
frame = FALSE)
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable(s) "v" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
The use of kernel density maps provide several advantages over point maps.