1 Import necessary packages

This code chunk helps us to import all necessary packages and install it if it wasn’t installed previously.

packages = c('sf', 'tidyverse','rgdal', 'maptools', 'raster','spatstat','tidyselect', 'tmap','units','gridExtra','tmaptools','dplyr','plotly')
for (p in packages){
if(!require(p, character.only = T)){
  install.packages(p)
  }
  library(p,character.only = T)
}

2 Spatial data wragling

2.1 Read all geospatial file

Prepare necessary geospatial layer that is required for analysis. Childcare and kindergarten refers to the childcare service and kindergarten (spatialpoints) in Singapore. Subzone refer to the planning subzone (spatial polygon) in Singapore.

childcare <- st_read("data/geospatial/child-care-services-kml.kml")
kindergarten <- st_read("data/geospatial/kindergartens-kml.kml")
subzone <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")

2.2 Check coordinate reference system for each geospatial spatial dataframe

The childcare spatial dataframe is CRS is WGS 84.

st_crs(childcare)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

The kindergarten CRS is WGS 84.

st_crs(kindergarten)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

The CRS for the subzone polygon are projected in SVY21

st_crs(subzone)
## Coordinate Reference System:
##   User input: SVY21 
##   wkt:
## PROJCRS["SVY21",
##     BASEGEOGCRS["SVY21[WGS84]",
##         DATUM["World Geodetic System 1984",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]],
##             ID["EPSG",6326]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["Degree",0.0174532925199433]]],
##     CONVERSION["unnamed",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",1.36666666666667,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",103.833333333333,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",1,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",28001.642,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",38744.572,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]]

2.3 Assign coordinnate system EPSG3414 to geospatial dataframe

To standardize the projected coordinate system, we will transform all the spatial to EPSG3414 which is the coordinate reference system for Singapore boundary. The code below are used to transform each of the spatial data frame to EPSG3414. We will then use st_crs() to ensure that it is being projected using the right coordinate reference system.

childcare3414 <- st_transform(childcare, 3414)
st_crs(childcare3414)
## Coordinate Reference System:
##   User input: EPSG:3414 
##   wkt:
## PROJCRS["SVY21 / Singapore TM",
##     BASEGEOGCRS["SVY21",
##         DATUM["SVY21",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4757]],
##     CONVERSION["Singapore Transverse Mercator",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",1.36666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",103.833333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",1,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",28001.642,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",38744.572,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["northing (N)",north,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["easting (E)",east,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["Singapore"],
##         BBOX[1.13,103.59,1.47,104.07]],
##     ID["EPSG",3414]]
kindergarten3414 <- st_transform(kindergarten, 3414)
st_crs(kindergarten3414)
## Coordinate Reference System:
##   User input: EPSG:3414 
##   wkt:
## PROJCRS["SVY21 / Singapore TM",
##     BASEGEOGCRS["SVY21",
##         DATUM["SVY21",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4757]],
##     CONVERSION["Singapore Transverse Mercator",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",1.36666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",103.833333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",1,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",28001.642,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",38744.572,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["northing (N)",north,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["easting (E)",east,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["Singapore"],
##         BBOX[1.13,103.59,1.47,104.07]],
##     ID["EPSG",3414]]
subzone3414 <- st_transform(subzone, 3414)
st_crs(subzone3414)
## Coordinate Reference System:
##   User input: EPSG:3414 
##   wkt:
## PROJCRS["SVY21 / Singapore TM",
##     BASEGEOGCRS["SVY21",
##         DATUM["SVY21",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4757]],
##     CONVERSION["Singapore Transverse Mercator",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",1.36666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",103.833333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",1,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",28001.642,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",38744.572,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["northing (N)",north,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["easting (E)",east,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["Singapore"],
##         BBOX[1.13,103.59,1.47,104.07]],
##     ID["EPSG",3414]]

3 Aspatial data wrangling

3.1 Import data

raw_data is saved as a variable for debugging purpose. Extract records that belongs to year 2018 (in 2020 they will be 2-6 years old) Group by planning area, subzone and age Sum up the population with respect to the groups Spread Age to column and fill it with its respective population value Ungroup the data and select the columns needed for analysis Convert planning area and subzone to upper case for join later

raw_data <- read_csv("data/aspatial/respopagesextod2011to2019.csv")
## Parsed with column specification:
## cols(
##   PA = col_character(),
##   SZ = col_character(),
##   AG = col_character(),
##   Sex = col_character(),
##   TOD = col_character(),
##   Pop = col_double(),
##   Time = col_double()
## )
res_data <-  raw_data %>%
  filter(Time == 2018) %>%
  group_by(PA,SZ,AG)%>%
  summarize(`POP`=sum(`Pop`))%>%
  spread(AG,POP)%>%
  ungroup()%>%
  dplyr::select(`PA`, `SZ`,`0_to_4`)%>%
  mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
glimpse(res_data)
## Observations: 323
## Variables: 3
## $ PA       <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO KIO...
## $ SZ       <chr> "ANG MO KIO TOWN CENTRE", "CHENG SAN", "CHONG BOON", ...
## $ `0_to_4` <dbl> 180, 1040, 890, 710, 200, 560, 250, 820, 0, 160, 0, 8...

3.2 Joining aspatial data with geospatial data

A left join is being used here to combine the 2 data frame. This is to make sure that the aspatial data is being mapped to their respective subzone on the map. Any data that is cannot be found in the subzone shapefile will be automatically remove from our analysis scope as there could be new subzone created since 2014.

subzone_res <- left_join(subzone3414, res_data, 
                              by = c("SUBZONE_N" = "SZ"))
## Warning: Column `SUBZONE_N`/`SZ` joining factor and character vector,
## coercing into character vector

4 EDA

4.1 Point in Polygon count

It is important to understand the supply of kindergarten and childcare service in each subzone. Hence, we will be using lengths and st_intersects to count the number of facilities in each subzone.

The 2 code chunk below count the number of kindergarten and childcare in each polygon and save it as a feature in the subzone_res data frame respectively.

subzone_res$`Kindergarten Count`<- lengths(st_intersects(subzone_res, kindergarten3414))
subzone_res$`Childcare Count`<- lengths(st_intersects(subzone_res, childcare3414))

4.2 Feature Engineering

Count polygon allow us to know the supply in each but it does not tell us if the supplies are being distributed across the subzone efficiently. Hence we will be performing some features engineering to get a better sense of the demand and supply of these facilities.

4.2.1 Calculate area for each subzone

First, we will get the area of each subzone and save it as a feature in the subzone_res data frame. set_units help us to convert area from m^2 to km^2. Although we can perform simple metrics conversion by dividing the area by 1,0000,000 to convert m^2 to km^2, it may be misleading since the unit show in the data frame is still in M^2.

subzone_res$Area <- subzone_res %>%
  st_area()%>%
  set_units(km^2)

4.2.2 Create new features for analysis

In the code chunk we will be creating the new variables that we will be using for choropleth analysis. Density features help us to determine if the number of facilities is distribute evenly based on land size. Children per facilities let us know if there are sufficient facilities catering to the subzone needs. Kindergarten Childcare Ratio help us to see if the kindergarten and childcare are proportionate across the various subzone.

subzone_res <- subzone_res %>%
  mutate(`Kindergarten Density` = as.numeric(`Kindergarten Count`/Area),
         `Childcare Density` = as.numeric(`Childcare Count`/Area),
         `Kindergarten Count` = na_if(`Kindergarten Count`,0),
         `Childcare Count` = na_if(`Childcare Count`,0),
         `Kindergarten Density` = na_if(`Kindergarten Density`,0),
         `Childcare Density` = na_if(`Childcare Density`,0),
         `Children per kindergarten` = as.numeric(`0_to_4`/`Kindergarten Count`),
         `Children per childcare` = as.numeric(`0_to_4`/`Childcare Count`),
         `Kindergarten Childcare Ratio` = (`Kindergarten Count`)/(`Childcare Count`),
         `0_to_4` = na_if(`0_to_4`,0))

4.3 Aspatial Analysis

In the usual data pipeline, this step should come before feature engineering however, we would like to analyze the new feature hence it is being place after feature engineering. Do note that this is an re-iterative process where new feature are added upon analysis.

This code remove the records with Kindergarten Count, childcare count or number of children that is zero in our data set as we do not want it to skew our analysis.

subzone_res_no0 <-subzone_res %>%
  filter(!is.na(`Kindergarten Count`) & !is.na(`Childcare Count`) & !is.na(`0_to_4`))

Check if the data is being process properly

glimpse(subzone_res_no0)
## Observations: 157
## Variables: 26
## $ OBJECTID                       <int> 2, 4, 5, 6, 7, 9, 15, 20, 24, 2...
## $ SUBZONE_NO                     <int> 1, 8, 3, 7, 9, 13, 1, 1, 1, 14,...
## $ SUBZONE_N                      <chr> "PEARL'S HILL", "HENDERSON HILL...
## $ SUBZONE_C                      <fct> OTSZ01, BMSZ08, BMSZ03, BMSZ07,...
## $ CA_IND                         <fct> Y, N, N, N, N, N, Y, N, N, N, N...
## $ PLN_AREA_N                     <fct> OUTRAM, BUKIT MERAH, BUKIT MERA...
## $ PLN_AREA_C                     <fct> OT, BM, BM, BM, BM, QT, SR, SI,...
## $ REGION_N                       <fct> CENTRAL REGION, CENTRAL REGION,...
## $ REGION_C                       <fct> CR, CR, CR, CR, CR, CR, CR, CR,...
## $ INC_CRC                        <fct> 8C7149B9EB32EEFC, 3775D82C5DDBE...
## $ FMEL_UPD_D                     <date> 2014-12-05, 2014-12-05, 2014-1...
## $ X_ADDR                         <dbl> 28679.06, 26782.83, 26201.96, 2...
## $ Y_ADDR                         <dbl> 29782.05, 29933.77, 30005.70, 2...
## $ SHAPE_Leng                     <dbl> 3506.107, 3313.625, 2825.594, 4...
## $ SHAPE_Area                     <dbl> 559816.2, 595428.9, 387429.4, 1...
## $ PA                             <chr> "OUTRAM", "BUKIT MERAH", "BUKIT...
## $ `0_to_4`                       <dbl> 230, 370, 590, 460, 580, 180, 2...
## $ geometry                       <MULTIPOLYGON [m]> MULTIPOLYGON (((29...
## $ `Kindergarten Count`           <int> 1, 1, 1, 2, 1, 5, 1, 2, 1, 1, 3...
## $ `Childcare Count`              <int> 4, 3, 1, 8, 3, 2, 5, 1, 4, 2, 6...
## $ Area                           [km^2] 0.5598162 [km^2], 0.5954289 [k...
## $ `Kindergarten Density`         <dbl> 1.7863004, 1.6794617, 2.5811152...
## $ `Childcare Density`            <dbl> 7.1452017, 5.0383851, 2.5811152...
## $ `Children per kindergarten`    <dbl> 230.0000, 370.0000, 590.0000, 2...
## $ `Children per childcare`       <dbl> 57.500000, 123.333333, 590.0000...
## $ `Kindergarten Childcare Ratio` <dbl> 0.2500000, 0.3333333, 1.0000000...

4.3.1 Descriptive analytics

This code generate the summary statistics for each of the features that we are interested in analyzing. This is a good way to quickly check the interquartile range together with mean.

summary(subzone_res_no0$`0_to_4`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      30     370     730    1092    1400    5750
summary(subzone_res_no0$`Kindergarten Count`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   2.815   3.000  17.000
summary(subzone_res_no0$`Childcare Count`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   3.000   7.000   8.357  11.000  42.000
summary(subzone_res_no0$`Area`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2768  0.7761  1.2257  1.4381  1.7945  4.9191
summary(subzone_res_no0$`Kindergarten Density`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2947  1.1179  1.9847  2.2199  2.9630 10.1947
summary(subzone_res_no0$`Childcare Density`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2033  3.5086  6.3409  6.5788  8.6999 20.7806
summary(subzone_res_no0$`Children per kindergarten`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    14.0   214.3   340.0   503.0   570.0  5750.0
summary(subzone_res_no0$`Children per childcare`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.333  86.000 118.809 137.124 165.000 590.000

4.3.2 Scatter plot

Scatter plot allow us to see if there is a relationship between the number of kindergarten and childcare services. From the plot, it seems that they have a direct relationship where the more kindergarten, the more childcare service there is in a planning area.

scatter <- ggplot(data=subzone_res_no0, 
       aes(y = `Childcare Count`, x= `Kindergarten Count`))+
  geom_point(color="black", fill="light blue")+
  geom_smooth(method=lm)
ggplotly(scatter)

4.3.3 Box plot comparing density

Box plot can help us to visualize the variance and distribution of the children per facility feature. From the chart it seems that the outlier for kindergarten is way higher compared to childcare. However, when we exclude the outlier, the distribution seem to be a little different

ggplotly(kindergarten_boxplot <- ggplot(data=subzone_res_no0, 
       aes(x= , y = `Children per kindergarten` ))+
  geom_boxplot())
kindergarten_boxplot <- ggplot(data=subzone_res_no0, 
       aes(x= , y = `Children per kindergarten` ))+
  geom_boxplot()
childcare_boxplot <- ggplot(data=subzone_res_no0, 
       aes(x= , y= `Children per childcare`))+
  geom_boxplot()
grid.arrange(kindergarten_boxplot,childcare_boxplot,ncol = 2)

Use ggplotly to get the upper fence value

ggplotly(kindergarten_boxplot)

4.4 Geo-spatial Analysis

4.4.1 Choropleth Maps

This is the choropleth map that show the number children aged 0 to 4 (2 to 6 years in 2020)across each subzone. We can see that the children population are not scattered evenly across the subzone. In fact, it is not co-related with area since places with most children are doesn’t come from a large polygon.

This is an interactive plot where you can click on a particular subzone and look at the details. Color fill is based on children population to understand the demand for the facilities

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(subzone_res)+
  tm_fill("0_to_4",
          id="SUBZONE_N",
          popup.vars=c("Population"="0_to_4",
                       "Num of Kindergarten"="Kindergarten Count",
                       "Num of Childcare"="Childcare Count",
                       "Area" = "Area"))+
  tm_layout(main.title = "Distribution of Demand by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

This is the code where the chloropleth map for the number of kindergarten in each subzone is save to the kindergarten_count variable.

kindergarten_count <- tm_shape(subzone_res)+
  tm_fill("Kindergarten Count",style = "fixed",breaks = c(1, 11, 21, 31, 41))+
  tm_layout(main.title = "Distribution of Kindergarten count by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

This is the code where the chloropleth map for the number of childcare in each subzone is save to the childcare_count variable.

childcare_count <- tm_shape(subzone_res)+
  tm_fill("Childcare Count", style="pretty")+
  tm_layout(main.title = "Distribution of childcare count by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

kindergarten count vs childcare count Plot the childcare and kindergarten count choropleth map side by side for comparison purpose. This plot is the comparison of the supply of both facilties. As we try to create similar breakpoint for both maps, the kindergarten plot diesn’t seem very useful since there are significantly lesser kindergarten than childcare in each subzone. However, we can deduce subzone with the highest childcare count isn’t the one with the largest area. Hence, there should be some form of clustering involved.

tmap_mode("plot")
## tmap mode set to plotting
tmap_arrange(kindergarten_count,childcare_count)

This is the code where the chloropleth map for the number of children per childcare in each subzone is save to the kindergarten_per_childcare variable.

children_per_kindergarten <- tm_shape(subzone_res)+
  tm_fill("Children per kindergarten", style = "fixed",breaks = c(0, 200, 400, 600, 800, 1000,6000)
          )+
  tm_layout(main.title = "Distribution of population-kindergarten ratio by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

This is the code where the chloropleth map for the number of children per childcare in each subzone is save to the children_per_childcare variable.

children_per_childcare <- tm_shape(subzone_res)+
  tm_fill("Children per childcare")+
  tm_layout(main.title = "Distribution of population-childcare ratio by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

kindergarten vs childcare population ratio Plot the childcare and kindergarten population ratio choropleth map side by side for comparison purpose. This plot show us if there is sufficient facilities in each subzone based on the demand. From this chart we can see if the demand of kindergarten is being met. Do note that the break point for this plot take into consideration of the boxplot in aspatial analysis where we group everything more than the upper fence (1080 approximate to 1000) as another category by itself

tmap_mode("plot")
## tmap mode set to plotting
tmap_arrange(children_per_kindergarten,children_per_childcare)

kindergarten-childcare ratio

This is the choropleth map for kindergarten childcare ratio. This choropleth map can show us the relationship between kindergarten and childcare service if the result is less than 1 it means that there is more kindergarten than childcare and vice versa. As you can see from this there are a few subzone with a high kindergarten to childcare ratio. This may indicate a lack of childcare in these area. In general, there are lesser kindergarten compared to childcare. However, a low ratio might just suggest that there is too much childcare or a lack of both facilities.

tm_shape(subzone_res)+
  tm_fill("Kindergarten Childcare Ratio")+
  tm_layout(main.title = "Distribution of Kindergarten Childcare Ratio by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

This is the code where the chloropleth map for kindergarten density across each subzone is save to the kindergarten_density variable.

kindergarten_density <- tm_shape(subzone_res)+
  tm_fill("Kindergarten Density")+
  tm_layout(main.title = "Distribution of Kindergarten Density by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

This is the code where the chloropleth map for childcare density across each subzone is save to the childcare_density variable.

childcare_density <- tm_shape(subzone_res)+
  tm_fill("Childcare Density")+
  tm_layout(main.title = "Distribution of Childcare Density by planning subzone",
            main.title.position = "center",
            main.title.size = 1,
            legend.height = 0.45, 
            legend.width = 0.35,
            legend.outside = FALSE,
            legend.position = c("right", "bottom"),
            frame = FALSE)+
  tm_borders(lwd = 0.8,  alpha = 1)

Kindergarten vs childcare density plot Plot the childcare and kindergarten density choropleth map side by side for comparison purpose. Some of the spot is missing as those subzone do not have any kindergarten or childcare. In general, we can see that the density for kindergarten and childcare is quite different.

tmap_arrange(kindergarten_density,childcare_density)

4.5 Geocommunication

According to ECDA, kindergarten are for children between 4 to below 7 years(excluding pre school) and childcare is catered for children between 18 months to below 7 years. Thus, the population that is targeted for analysis is 2-6 years old children in 2020. That come with an assumption that most people do not relocate their within this period of time.

Among all the choropleth maps, different feature is being distributed in different ways.According to the population choropleth map where we see the absolute number of children 2 to below 7 years. Certain subzone such as tampines east and mathilda many children while some have very little. However, we should not just exclude them from them from analysis as the may affect the complete spatial randomness. There are some choropleth map that does explain much such as the density map. However, it does give us a sensing if the features are being distributed fairly across the map. Overall, it can help us to scope which are the planning area that is worthy to perform a more in depth analysis.

Although the scatter plot might not have a very strong relationship, we can see the number of kindergarten is directly proportionate to the number of childcare service. With the summary statistics and box plot we can see that there a quite a few subzone with especially high number of kindergarten and childcare services.

5 Preparing Spatial data for futher analysis

To perform more advance analysis such as Kernel Density Estimation function, we need to convert the Spatial Data Frame into spatial object first and then to spatstat’s ppp format

5.1 Convert sf object to spatial object

Convert kindergarten3414 and childcare3414 sf dataframe to sp object.

kindergarten_sp <- as_Spatial(kindergarten3414$geometry)
childcare_sp <- as_Spatial(childcare3414$geometry)

Compare

par(mfrow=c(1,2))
plot(kindergarten_sp, main = "Kindergarten Points")
plot(childcare_sp,main = "Childcare Points")

Convert subzone3414 sf dataframe to sp object.

subzone_sp <- as_Spatial(subzone3414)
plot(subzone_sp, main = "Subzone Polygon")

5.2 Convert SP object to ppp

Transform the childcare sp object to ppp object after transforming the CRS to 3414

childcare_ppp <- as(spTransform(childcare_sp,CRS("+init=epsg:3414")), "ppp")
## Warning in proj4string(x): CRS object has comment, which is lost in output

## Warning in proj4string(x): CRS object has comment, which is lost in output
plot(childcare_ppp)

Transform the Kindergarten sp object to ppp object after transforming the CRS to 3414

kindergarten_ppp <- as(spTransform(kindergarten_sp,CRS("+init=epsg:3414")), "ppp")
## Warning in proj4string(x): CRS object has comment, which is lost in output

## Warning in proj4string(x): CRS object has comment, which is lost in output
plot(kindergarten_ppp)

5.3 Creating Owin object for subzone polygon

subzone_owin <- as(subzone_sp, "owin")
plot(subzone_owin)

5.4 Handling duplicated points

Check if there are any duplicate point in childcare and kindergarten ppp object

print(any(duplicated(childcare_ppp)))
## [1] TRUE
print(any(duplicated(kindergarten_ppp)))
## [1] TRUE

Count how many location have more than one point event

sum(multiplicity(childcare_ppp) > 1)
## [1] 128
sum(multiplicity(kindergarten_ppp) > 1)
## [1] 100

5.5 Jitter spatial points

Since the number of duplicate is a significant amount in the data set, jitter would be a better way to go around it

childcare_ppp_jit <- rjitter(childcare_ppp, retry=TRUE, nsim=1, drop=TRUE)
plot(childcare_ppp_jit)

kindergarten_ppp_jit <- rjitter(kindergarten_ppp, retry=TRUE, nsim=1, drop=TRUE)
plot(kindergarten_ppp_jit)

This code is to make sure that after jitter there a no duplicate in the ppp object.

print(any(duplicated(childcare_ppp_jit)))
## [1] FALSE
print(any(duplicated(kindergarten_ppp_jit)))
## [1] FALSE

5.6 Combine facilities point with subzone polygon

par(mfrow=c(1,2))
kindergartenSG_ppp = kindergarten_ppp_jit[subzone_owin]
childcareSG_ppp = childcare_ppp_jit[subzone_owin]
plot(kindergartenSG_ppp)
plot(childcareSG_ppp)

As you can see from the 2 plots above, it seems that kindergarten is more sparse compared to kindergarten. It is a given since there is a total of 1545 childcare and only 448 kindergarten. For both plot, they seem to be spread across the same area. However, the denser part on the childcare plot seems to be a little different from the kindergarten. This suggest that the location of childcare service might not be only dependent on the location of kindergarten.

5.7 combine facilities with 4 subzone

As L function is very resource intensive, we will be performing it based on 4 subzone: Sengkang, Bedok, Bukit Batok and Hougang. The code chunk below will help us to generate the ppp with respect to the following subzone

5.7.1 Extract polygon of study area

par(mfrow=c(2,2))
sk <- subzone3414[subzone3414$PLN_AREA_N == "SENGKANG",]
bedok <- subzone3414[subzone3414$PLN_AREA_N == "BEDOK",]
bb <- subzone3414[subzone3414$PLN_AREA_N == "BUKIT BATOK",]
hg <- subzone3414[subzone3414$PLN_AREA_N == "HOUGANG",]

5.7.2 Convert them to Spatial Objects

sk <- as_Spatial(sk)
bedok <- as_Spatial(bedok)
bb <- as_Spatial(bb)
hg <- as_Spatial(hg)

5.7.3 Create Owin object

Create Owin object for each of the planning area and plot it

par(mfrow=c(2,2))
sk_owin <- as(sk, "owin")
bedok_owin <- as(bedok, "owin")
bb_owin <- as(bb, "owin")
hg_owin <- as(hg, "owin")
plot(sk_owin)
plot(bedok_owin)
plot(bb_owin)
plot(hg_owin)

5.7.4 Combing facilities with each planning area

After combining the facilities with their respective planning area, we can see certain pattern in each of the point maps. First we compare the 4 planning area for kindergarten and childcare. it seems that the kindergarten is more sparse compared with childcare. Unlike the map for Singapore as a whole, it seems that the facilities are randomly distributed. However, there are still some form of clustering that exist in the map which is hard to tell since the point intersect each other.

par(mfrow=c(2,2))
kindergartensk_ppp = kindergarten_ppp_jit[sk_owin]
kindergartenbedok_ppp = kindergarten_ppp_jit[bedok_owin]
kindergartenbb_ppp = kindergarten_ppp_jit[bb_owin]
kindergartenhg_ppp = kindergarten_ppp_jit[hg_owin]
plot(kindergartensk_ppp)
plot(kindergartenbedok_ppp)
plot(kindergartenbb_ppp)
plot(kindergartenhg_ppp)

par(mfrow=c(2,2))
childcaresk_ppp = childcare_ppp_jit[sk_owin]
childcarebedok_ppp = childcare_ppp_jit[bedok_owin]
childcarebb_ppp = childcare_ppp_jit[bb_owin]
childcarehg_ppp = childcare_ppp_jit[hg_owin]
plot(childcaresk_ppp)
plot(childcarebedok_ppp)
plot(childcarebb_ppp)
plot(childcarehg_ppp)

6 Chi square analysis for quadrat test

Confidence level of 99% therefore my alpha would be 0.01 and the number of simulation run should be 2/0.01 = 200 H0 <- The distribution of kindergarten in Singapore are randomly distributed H1 <- The distribution of kindergarten in Singapore are not randomly distributed This is the code to perform quadrat test with monte carlo simulation for the kindergarten

kindergarten_qt <- quadrat.test(kindergartenSG_ppp, 
             nx = 20, ny = 15,
             method="M",
             nsim=199)

This is the code to perform quadrat test with monte carlo simulation for the childcare

childcare_qt<- quadrat.test(childcareSG_ppp, 
             nx = 20, ny = 15,
             method="M",
             nsim=199)

6.1 Plotting the quadrat count

As you can see how the map is being split into grids, there is some form of cluster pattern for both kindergarten and childcare. With that being say it is still difficult to visual how concentrated the cluster is.

plot(kindergartenSG_ppp)
plot(kindergarten_qt, add = TRUE, cex =.1)

plot(childcareSG_ppp)
plot(childcare_qt, add = TRUE, cex =.1)

7 Nearest Neighbour Analysis

Confidence level of 99% therefore my alpha would be 0.01 and the number of simulation run should be 2/0.01 = 200

For both the clark evans test for kindergarten and childcare, we get an r value that is lesser than 1. This suggests that both kindergarten and childcare have an underlying cluster pattern within them.

clarkevans.test(kindergartenSG_ppp,
                correction="none",
                clipregion="subzone_owin",
                alternative=c("two.sided"),
                nsim=199)
## 
##  Clark-Evans test
##  No edge correction
##  Monte Carlo test based on 199 simulations of CSR with fixed n
## 
## data:  kindergartenSG_ppp
## R = 0.50106, p-value = 0.01
## alternative hypothesis: two-sided

There is a clustered pattern towards the spatial pattern for childcare service in Singapore

clarkevans.test(childcareSG_ppp,
                correction="none",
                clipregion="subzone_owin",
                alternative=c("two.sided"),
                nsim=99)
## 
##  Clark-Evans test
##  No edge correction
##  Monte Carlo test based on 99 simulations of CSR with fixed n
## 
## data:  childcareSG_ppp
## R = 0.5455, p-value = 0.02
## alternative hypothesis: two-sided

8 2nd Order Analysis

2nd order properties can help us to determine if the facilities appears to be clustered, random or regularly spaced. This analysis is useful to provide insight on the point pattern as well as derive a good bandwidth for the Kernel Density Estimation (KDE).

Due to the limited computational power, 99% confidence interval will used to limit the number of simulations needed for the monte carlo simuation.

For all 2nd order analysis, we will standardize the hypothesis as: H0: Distribution of facilities are randomly distributed H1: Distribution of facilities are not randomly distributed

8.1 G Function

G function measures the distribution of distance from an arbitrary event to its nearest neighbor. Clustered pattern have their observed location nearer to each other while regular have higher nearest neighbor distance compared to complete spatial randomness.

8.1.1 Kindergarten

Compute the G function for kindergarten and visualize the distribution.

G_kindergarten = Gest(kindergartenSG_ppp, correction = "border")
plot(G_kindergarten)

We will use Monte Carlo test to test for complete spatial randomness The hypothesis is as follows: H0: Distribution of kindergarten are randomly distributed H1: Distribution of kindergarten are not randomly distributed A 2 tailed test with confidence interval of 99% will be perform. Hence the number of simulation needed will be 2/0.01 = 199 simulations

G_kindergarten.csr <- envelope(kindergartenSG_ppp, Gest, nsim = 199)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
plot(G_kindergarten.csr)

8.1.2 Childcare

Compute the G function for childcare and visualize the distribution.

G_childcare = Gest(childcareSG_ppp, correction = "border")
plot(G_childcare)

We will use Monte Carlo test to test for complete spatial randomness The hypothesis is as follows: H0: Distribution of childcare are randomly distributed H1: Distribution of childcare are not randomly distributed A 2 tailed test with confidence interval of 99% will be perform. Hence the number of simulation needed will be 2/0.01 = 199 simulations

G_childcare.csr <- envelope(childcareSG_ppp, Gest, nsim = 199)
## Generating 199 simulations of CSR  ...
## 1, 2,  [etd 3:37] 3, 4 [etd 3:34] .6 [etd 3:35] .
## 8 [etd 3:32] .10 [etd 3:29] .12 [etd 3:27] .14
##  [etd 3:25] .16 [etd 3:23] .18 [etd 3:20] .20 [etd 3:20] .
## 22 [etd 3:17] .24 [etd 3:15] .26 [etd 3:13] .28
##  [etd 3:10] .30 [etd 3:08] .32 [etd 3:05] .34 [etd 3:03] .
## 36 [etd 3:02] .38 [etd 2:59] .40 [etd 2:58] .42
##  [etd 2:55] .44 [etd 2:53] .46 [etd 2:50] .48 [etd 2:48] .
## 50 [etd 2:46] .52 [etd 2:43] .54 [etd 2:41] .56
##  [etd 2:39] .58 [etd 2:37] .60 [etd 2:35] .62 [etd 2:32] .
## 64 [etd 2:30] .66 [etd 2:28] .68 [etd 2:25] .70
##  [etd 2:23] .72 [etd 2:21] .74 [etd 2:19] .76 [etd 2:17] .
## 78 [etd 2:15] .80 [etd 2:12] .82 [etd 2:10] .84
##  [etd 2:08] .86 [etd 2:05] .88 [etd 2:03] .90 [etd 2:01] .
## 92 [etd 1:59] .94 [etd 1:57] .96 [etd 1:55] .98
##  [etd 1:52] .100 [etd 1:50] .102 [etd 1:48] .104 [etd 1:45] .
## 106 [etd 1:43] .108 [etd 1:41] .110 [etd 1:39] .112
##  [etd 1:37] .114 [etd 1:34] .116 [etd 1:32] .118 [etd 1:30] .
## 120 [etd 1:28] .122 [etd 1:26] .124 [etd 1:23] .126
##  [etd 1:21] .128 [etd 1:19] .130 [etd 1:17] .132 [etd 1:15] .
## 134 [etd 1:12] .136 [etd 1:10] .138 [etd 1:08] .140
##  [etd 1:06] .142 [etd 1:03] .144 [etd 1:01] .146 [etd 59 sec] .
## 148 [etd 57 sec] .150 [etd 55 sec] .152 [etd 53 sec] .154
##  [etd 50 sec] .156 [etd 48 sec] .158 [etd 46 sec] .160 [etd 44 sec] .
## 162 [etd 41 sec] .164 [etd 39 sec] .166 [etd 37 sec] .168
##  [etd 35 sec] .170 [etd 32 sec] .172 [etd 30 sec] .174 [etd 28 sec] .
## 176 [etd 26 sec] .178 [etd 23 sec] .180 [etd 21 sec] .182
##  [etd 19 sec] .184 [etd 17 sec] .186 [etd 15 sec] .188 [etd 12 sec] .
## 190 [etd 10 sec] .192 [etd 8 sec] .194 [etd 6 sec] .196
##  [etd 3 sec] .198 [etd 1 sec]  199.
## 
## Done.
plot(G_childcare.csr)

8.1.3 G function Conclusion

As you can see from the G function of kindergarten and childcare in Singapore, both empirical line is above the envelope. This suggest that both the facilities followed a cluster pattern when we are observing in Singapore as a whole.

8.2 L Function

L function is the transformed version of K function. Instead of looking of neighbour based on density (G function calculate number of event in the ring), it is calculated based on distance of the nearest neighbor. However, the method is very computational intensive so we can only afford to perform it at subzone level.

8.2.1 Kindergarten

Compute the L function for kindergarten for each planning area and visualize the distribution.

par(mfrow=c(2,2))
L_kindergarten_sk = Lest(kindergartensk_ppp, correction = "Ripley")
L_kindergarten_bedok = Lest(kindergartenbedok_ppp, correction = "Ripley")
L_kindergarten_bb = Lest(kindergartenbb_ppp, correction = "Ripley")
L_kindergarten_hg = Lest(kindergartenhg_ppp, correction = "Ripley")
plot(L_kindergarten_sk, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_kindergarten_bedok, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_kindergarten_bb, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_kindergarten_hg, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")

We will use Monte Carlo test to test for complete spatial randomness The hypothesis is as follows: H0: Distribution of kindergarten are randomly distributed H1: Distribution of kindergarten are not randomly distributed A 2 tailed test with confidence interval of 99% will be perform. Hence the number of simulation needed will be 2/0.01 = 199 simulations

L_kindergarten_sk.csr <- envelope(kindergartensk_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_kindergarten_bedok.csr <- envelope(kindergartenbedok_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_kindergarten_bb.csr <- envelope(kindergartenbb_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_kindergarten_hg.csr <- envelope(kindergartenhg_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
par(mfrow=c(2,2))
plot(L_kindergarten_sk.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_kindergarten_bedok.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_kindergarten_bb.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_kindergarten_hg.csr, . - r ~ r, xlab="d", ylab="L(d)-r")

8.2.2 Childcare

Compute the L function for childcare and visualize the distribution.

par(mfrow=c(2,2))
L_childcare_sk = Lest(childcaresk_ppp, correction = "Ripley")
L_childcare_bedok = Lest(childcarebedok_ppp, correction = "Ripley")
L_childcare_bb = Lest(childcarebb_ppp, correction = "Ripley")
L_childcare_hg = Lest(childcarehg_ppp, correction = "Ripley")
plot(L_childcare_sk, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_childcare_bedok, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_childcare_bb, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")
plot(L_childcare_hg, . -r ~ r, 
     ylab= "L(d)-r", xlab = "d(m)")

We will use Monte Carlo test to test for complete spatial randomness The hypothesis is as follows: H0: Distribution of childcare are randomly distributed H1: Distribution of childcare are not randomly distributed A 2 tailed test with confidence interval of 99% will be perform. Hence the number of simulation needed will be 2/0.01 = 199 simulations

L_childcare_sk.csr <- envelope(childcaresk_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_childcare_bedok.csr <- envelope(childcarebedok_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_childcare_bb.csr <- envelope(childcarebb_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
L_childcare_hg.csr <- envelope(childcarehg_ppp, Lest, nsim = 199, rank = 1, glocal=TRUE)
## Generating 199 simulations of CSR  ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34.36.38.
## 40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74.76.78
## .80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114.116.
## 118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154.156
## .158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194.
## 196.198 199.
## 
## Done.
par(mfrow=c(2,2))
plot(L_childcare_sk.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_childcare_bedok.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_childcare_bb.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
plot(L_childcare_hg.csr, . - r ~ r, xlab="d", ylab="L(d)-r")

8.2.3 L function conclusion

Unlike the result from g function, L function suggest that there is some form of spatial randomness when we are anaysing the point based on subzone. Among the 4 subzone, Seng Kang seem to be the only one that still have some form of spatial clustering pattern despite it being weak. The rest of the subzone for both facilities have their empirical line within the envelope. This suggest that even though we look at singapore as a whole, there might be cluster but when we zoom in to individual subzone, the location are randomly distributed.

9 Kernel Density Estimation (KDE)

KDE is used for 1st order analysis where we analyze the density value based on number of points per square. However, in this case, the number of points is not a discrete value since we are using guassian distribution for the KDE.

9.1 Covert the unit of measurement from meter to kilometer

If we were to use meter square the values in the legend of the KDE plot would be very small. Hence we will convert is to km square instead.

childcareSG_ppp.km <- rescale(childcareSG_ppp, 1000, "km")
kindergartenSG_ppp.km <- rescale(kindergartenSG_ppp, 1000, "km")

9.2 Compute Kernel density estimation

Compute the KDE of Kindergarten in Singapore. bw.ppl is used to generate the bandwidth for KDE as when we are looking at the point analysis, there seems to be a few tight cluster in the Singapore map.

kde_kindergartenSG_bw <- density(kindergartenSG_ppp.km, sigma=bw.ppl, edge=TRUE, kernel="gaussian")
plot(kde_kindergartenSG_bw)

Compute the KDE of Childcare in Singapore

kde_childcareSG_bw <- density(childcareSG_ppp.km, sigma=bw.ppl, edge=TRUE, kernel="gaussian")
plot(kde_childcareSG_bw)

9.3 Converting KDE output into grid object

Convert KDE output into grid object for both kindergarten and childcare so we can convert it into raster format.

gridded_kde_kindergarten_bw <- as.SpatialGridDataFrame.im(kde_kindergartenSG_bw)
spplot(gridded_kde_kindergarten_bw)

gridded_kde_childcareSG_bw <- as.SpatialGridDataFrame.im(kde_childcareSG_bw)
spplot(gridded_kde_childcareSG_bw)

9.4 Converting gridded output into raster

Convert the gridded output into raster so we can plot it with Open street map. Set the projection to EPSG 3414, datum to WGS 84 and units to km. Check if the crs matches what we have specified

kde_childcareSG_bw_raster <- raster(gridded_kde_childcareSG_bw)
projection(kde_childcareSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
crs(kde_childcareSG_bw_raster)
## CRS arguments:
##  +init=EPSG:3414 +datum=WGS84 +units=km +proj=tmerc
## +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1
## +x_0=28001.642 +y_0=38744.572 +no_defs +ellps=WGS84 +towgs84=0,0,0
kde_kindergartenSG_bw_raster <- raster(gridded_kde_kindergarten_bw)
projection(kde_kindergartenSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
crs(kde_kindergartenSG_bw_raster)
## CRS arguments:
##  +init=EPSG:3414 +datum=WGS84 +units=km +proj=tmerc
## +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1
## +x_0=28001.642 +y_0=38744.572 +no_defs +ellps=WGS84 +towgs84=0,0,0

9.5 Plotting KDE on Open Street Map

Set the base map to be OpenStreetMap Specify the chart elements such as legend and title. Add the KDE layer with a alpha of 0.8 so we can see the detail on Open Street Map Add in the subzone polygon to help us to visualise the KDE with respect to subzone.

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap")+
  tm_layout(legend.outside = TRUE, title="Childcare KDE with OpenStreetMap")+
  tm_shape(kde_kindergartenSG_bw_raster)+
  tm_raster("v", alpha=0.8)+
  tm_shape(subzone3414)+
  tm_borders(lwd = 1, alpha = 0.5)
## Variable(s) "v" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap")+
  tm_layout(legend.outside = TRUE, title="Childcare KDE with OpenStreetMap")+
  tm_shape(kde_childcareSG_bw_raster)+
  tm_raster("v", alpha=0.8)+
  tm_shape(subzone3414)+
  tm_borders(lwd = 1, alpha = 0.5)
## Variable(s) "v" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

9.6 Advantages of KDE

There are various KDE and point plot that is created above. I have pick the one that can show us the advantages of each map.

For point map: 1. It is relatively easy to plot 2. We can see the individual point even if there is no cluster

For KDE: 1. We can visualize if a particular area have access to many of the facilities or little, but in point we might can only see the number of point instead of its accessibility. 2. we can use different kernel to do the estimation. For example, we can image gaussian distribution as walking distance where the point awarded is not just 1 or 0 but a continuous value 3. KDE allow us to customize our definition of accessibility. By determining the bandwidth we can numerical classify which are the area that have higher accessibility.

par(mfrow=c(1,2))
plot(childcareSG_ppp)
plot(kde_childcareSG_bw)

10 Conclusion

It is interesting to know that spatial randomness can varies greatly when the context of the map change from the entire country to a particular subzone. This may be a sign for us to use clipping method in the future where we exclude polygon that have very little children. For example, industrial area or water catchment area.

Overall we can say that there is some sort of clustering pattern for each of the facilities on a country level but not on a subzone level. This is good sign as it helps to improve accessibility of the facilities to everyone instead of a concentrated area but at the same time we do not waste resources to build these facilities where there are no users.