packages = c('rgdal', 'spdep', 'ClustGeo', 'tmap', 'sf', 'ggpubr', 'cluster', 'heatmaply', 'corrplot', 'psych', 'tidyverse','GISTools','factoextra')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
Importing geospatial data into R environment and named it as sg_sf.
sg_sf <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
sg_sf <- st_set_crs(sg_sf, 3414)
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
sg_sf
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## epsg (SRID): 3414
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## First 10 features:
## OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## 1 1 1 MARINA SOUTH MSSZ01 Y MARINA SOUTH
## 2 2 1 PEARL'S HILL OTSZ01 Y OUTRAM
## 3 3 3 BOAT QUAY SRSZ03 Y SINGAPORE RIVER
## 4 4 8 HENDERSON HILL BMSZ08 N BUKIT MERAH
## 5 5 3 REDHILL BMSZ03 N BUKIT MERAH
## 6 6 7 ALEXANDRA HILL BMSZ07 N BUKIT MERAH
## 7 7 9 BUKIT HO SWEE BMSZ09 N BUKIT MERAH
## 8 8 2 CLARKE QUAY SRSZ02 Y SINGAPORE RIVER
## 9 9 13 PASIR PANJANG 1 QTSZ13 N QUEENSTOWN
## 10 10 7 QUEENSWAY QTSZ07 N QUEENSTOWN
## PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## 1 MS CENTRAL REGION CR 5ED7EB253F99252E 2014-12-05 31595.84
## 2 OT CENTRAL REGION CR 8C7149B9EB32EEFC 2014-12-05 28679.06
## 3 SR CENTRAL REGION CR C35FEFF02B13E0E5 2014-12-05 29654.96
## 4 BM CENTRAL REGION CR 3775D82C5DDBEFBD 2014-12-05 26782.83
## 5 BM CENTRAL REGION CR 85D9ABEF0A40678F 2014-12-05 26201.96
## 6 BM CENTRAL REGION CR 9D286521EF5E3B59 2014-12-05 25358.82
## 7 BM CENTRAL REGION CR 7839A8577144EFE2 2014-12-05 27680.06
## 8 SR CENTRAL REGION CR 48661DC0FBA09F7A 2014-12-05 29253.21
## 9 QT CENTRAL REGION CR 1F721290C421BFAB 2014-12-05 22077.34
## 10 QT CENTRAL REGION CR 3580D2AFFBEE914C 2014-12-05 24168.31
## Y_ADDR SHAPE_Leng SHAPE_Area geometry
## 1 29220.19 5267.381 1630379.3 MULTIPOLYGON (((31495.56 30...
## 2 29782.05 3506.107 559816.2 MULTIPOLYGON (((29092.28 30...
## 3 29974.66 1740.926 160807.5 MULTIPOLYGON (((29932.33 29...
## 4 29933.77 3313.625 595428.9 MULTIPOLYGON (((27131.28 30...
## 5 30005.70 2825.594 387429.4 MULTIPOLYGON (((26451.03 30...
## 6 29991.38 4428.913 1030378.8 MULTIPOLYGON (((25899.7 297...
## 7 30230.86 3275.312 551732.0 MULTIPOLYGON (((27746.95 30...
## 8 30222.86 2208.619 290184.7 MULTIPOLYGON (((29351.26 29...
## 9 29893.78 6571.323 1084792.3 MULTIPOLYGON (((20996.49 30...
## 10 30104.18 3454.239 631644.3 MULTIPOLYGON (((24472.11 29...
sg_sp <- as_Spatial(sg_sf)
glimpse(sg_sf)
## Observations: 323
## Variables: 16
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ SUBZONE_NO <int> 1, 1, 3, 8, 3, 7, 9, 2, 13, 7, 12, 6, 1, 5, 1, 1, 3, 2, 2,…
## $ SUBZONE_N <fct> MARINA SOUTH, PEARL'S HILL, BOAT QUAY, HENDERSON HILL, RED…
## $ SUBZONE_C <fct> MSSZ01, OTSZ01, SRSZ03, BMSZ08, BMSZ03, BMSZ07, BMSZ09, SR…
## $ CA_IND <fct> Y, Y, Y, N, N, N, N, Y, N, N, N, N, Y, Y, Y, N, N, N, N, N…
## $ PLN_AREA_N <fct> MARINA SOUTH, OUTRAM, SINGAPORE RIVER, BUKIT MERAH, BUKIT …
## $ PLN_AREA_C <fct> MS, OT, SR, BM, BM, BM, BM, SR, QT, QT, QT, BM, ME, RV, SR…
## $ REGION_N <fct> CENTRAL REGION, CENTRAL REGION, CENTRAL REGION, CENTRAL RE…
## $ REGION_C <fct> CR, CR, CR, CR, CR, CR, CR, CR, CR, CR, CR, CR, CR, CR, CR…
## $ INC_CRC <fct> 5ED7EB253F99252E, 8C7149B9EB32EEFC, C35FEFF02B13E0E5, 3775…
## $ FMEL_UPD_D <date> 2014-12-05, 2014-12-05, 2014-12-05, 2014-12-05, 2014-12-0…
## $ X_ADDR <dbl> 31595.84, 28679.06, 29654.96, 26782.83, 26201.96, 25358.82…
## $ Y_ADDR <dbl> 29220.19, 29782.05, 29974.66, 29933.77, 30005.70, 29991.38…
## $ SHAPE_Leng <dbl> 5267.381, 3506.107, 1740.926, 3313.625, 2825.594, 4428.913…
## $ SHAPE_Area <dbl> 1630379.27, 559816.25, 160807.50, 595428.89, 387429.44, 10…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((31495.56 30..., MULTIPOLYGON …
raw_pop <- read_csv("data/aspatial/respopagesextod2011to2019.csv")
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## TOD = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
pop_age <- raw_pop %>%
group_by(SZ,PA,AG,Time) %>%
summarise(Pop = sum(Pop)) %>%
ungroup() %>%
filter(Time == 2019) %>%
spread(AG,Pop) %>%
mutate(Economy = `25_to_29` + `30_to_34` + `35_to_39`+`40_to_44`+`45_to_49`+`50_to_54`+`55_to_59`+`60_to_64`) %>%
mutate(Young = `0_to_4`+`5_to_9`+`10_to_14`+`15_to_19`+`20_to_24`) %>%
mutate(Aged = `65_to_69`+`70_to_74`+`75_to_79`+`80_to_84`+`85_to_89`+`90_and_over`) %>%
mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
pop_age <- subset(pop_age, select = -c(PA,Time,`0_to_4`,`5_to_9`,`10_to_14`,`15_to_19`,`20_to_24` , `25_to_29` , `30_to_34` , `35_to_39`,`40_to_44`,`45_to_49`,`50_to_54`,`55_to_59`,`60_to_64`,`65_to_69`,`70_to_74`,`75_to_79`,`80_to_84`,`85_to_89`,`90_and_over`))
pop_age <- pop_age %>%
group_by(SZ) %>%
mutate(total_pop = sum(Economy,Young,Aged)) %>%
mutate(Pop_density = sum(Economy,Young,Aged) )
pop_housing <- raw_pop %>%
group_by(SZ,PA,Time,TOD) %>%
summarise(Pop = sum(Pop)) %>%
ungroup()%>%
filter(Time == 2019) %>%
spread(TOD,Pop) %>%
mutate(`HDB1-2` = `HDB 1- and 2-Room Flats`) %>%
mutate(`HDB3-4` = `HDB 3-Room Flats` + `HDB 4-Room Flats` ) %>%
mutate(`HDB5-Ec` = `HDB 5-Room and Executive Flats`) %>%
mutate(`Cond_Apt` = `Condominiums and Other Apartments`) %>%
mutate(`Landed` = `Landed Properties`) %>%
mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper))
pop_housing <- subset(pop_housing, select = -c(2:11))
pop_housing <- pop_housing %>%
group_by(SZ) %>%
mutate(dweller = sum(`HDB1-2`,`HDB3-4`,`HDB5-Ec`,Cond_Apt,Landed))
Combining both pop_age and pop_housing by using left_join function and name the complete geodemographic data set as “geodemographic”.
geodemographic <- left_join(pop_age, pop_housing)
## Joining, by = "SZ"
Gov_Embassy
Gov_emb <- st_read(dsn = "data/geospatial", layer = "Govt_Embassy")
## Reading layer `Govt_Embassy' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 443 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.6282 ymin: 1.24911 xmax: 103.9884 ymax: 1.45765
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Gov_emb <- st_transform(Gov_emb, CRS("+init=EPSG:3414"))
class(Gov_emb)
## [1] "sf" "data.frame"
Gov_emb <- as_Spatial(Gov_emb)
Gov_emb
## class : SpatialPointsDataFrame
## features : 443
## extent : 5177.756, 45262.14, 25745.76, 48805.09 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 5
## names : POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, ST_NAME
## min values : 36439292, 1, 9525, ACC AND CORPORATE REGULATORY AUTH, AIRPORT BLVD
## max values : 1203262968, 2, 9993, WORLD BANK, YISHUN ST 81
Gov_emb_point <- poly.counts(Gov_emb,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,Gov_emb_point)
Business differs slightly from rest of the urban functions as it includes industry in the same set of data. However, by looking at the data set, we are able to identify that those rows with FAC_TYPE 5000 belongs to Business and those with FAC_TYPE 9991 belongs to Industry.
Business <- st_read(dsn = "data/geospatial", layer = "Business") %>%
filter(FAC_TYPE == "5000")
## Reading layer `Business' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 6550 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.6147 ymin: 1.24605 xmax: 104.0044 ymax: 1.4698
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Business <- st_transform(Business, CRS("+init=EPSG:3414"))
Business <- as_Spatial(Business)
Business
## class : SpatialPointsDataFrame
## features : 6440
## extent : 3669.148, 47034.83, 25408.41, 50148.54 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 5
## names : POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, ST_NAME
## min values : 36438017, 1, 5000, 1 FINLAYSON GREEN, ABINGDON RD
## max values : 1204290057, 3, 5000, ZUELLIG PHARMA, YUNG HO RD
Business_point <- poly.counts(Business,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,Business_point)
Industry data set is extracted from Business data set.
Industry <- st_read(dsn = "data/geospatial", layer = "Business") %>%
filter(FAC_TYPE == "9991")
## Reading layer `Business' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 6550 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.6147 ymin: 1.24605 xmax: 104.0044 ymax: 1.4698
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Industry <- st_transform(Industry, CRS("+init=EPSG:3414"))
Industry <- as_Spatial(Industry)
Industry
## class : SpatialPointsDataFrame
## features : 110
## extent : 4674.563, 46655.31, 26960.98, 49784.76 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 5
## names : POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, ST_NAME
## min values : 36438522, 1, 9991, 115A, 115B COMMONWEALTH DRIVE, ALPS AVE
## max values : 1202822668, 2, 9991, YISHUN INDUSTRIAL PARK A, YISHUN IND PARK A
Industry_point <- poly.counts(Industry,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,Industry_point)
Financial <- st_read(dsn = "data/geospatial", layer = "Financial")
## Reading layer `Financial' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3320 features and 29 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.6256 ymin: 1.24392 xmax: 103.9998 ymax: 1.46247
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Financial <- st_transform(Financial, CRS("+init=EPSG:3414"))
Financial <- as_Spatial(Financial)
Financial
## class : SpatialPointsDataFrame
## features : 3320
## extent : 4881.527, 46526.16, 25171.88, 49338.02 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 29
## names : LINK_ID, POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, POI_LANGCD, POI_NMTYPE, POI_ST_NUM, ST_NUM_FUL, ST_NFUL_LC, ST_NAME, ST_LANGCD, POI_ST_SD, ACC_TYPE, PH_NUMBER, ...
## min values : 116130476, 36438791, 1, 3578, 101 CREDIT, ENG, B, 1, 29A, ENG, ADMIRALTY RD W, ENG, L, NA, 18001111111, ...
## max values : 1224141214, 1204192646, 2, 6000, ZURCHER KANTONALBANK, ENG, J, 999, 8A, ENG, YUNG SHENG RD, ENG, R, NA, 90605353, ...
Financial_point <- poly.counts(Financial,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,Financial_point)
shopping <- st_read(dsn = "data/geospatial", layer = "Shopping")
## Reading layer `Shopping' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 511 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.679 ymin: 1.24779 xmax: 103.9644 ymax: 1.4535
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
shopping <- st_transform(shopping, CRS("+init=EPSG:3414"))
shopping <- as_Spatial(shopping)
shopping
## class : SpatialPointsDataFrame
## features : 511
## extent : 10824.78, 42586.69, 25599.8, 48346.17 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 5
## names : POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, ST_NAME
## min values : 36438094, 1, 6512, 1 NASSIM, AH HOOD RD
## max values : 1204344889, 3, 6512, ZHONGSHAN MALL, YISHUN AVE 2
shopping_point <- poly.counts(shopping,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,shopping_point)
Residential <- st_read(dsn = "data/geospatial", layer = "Private residential")
## Reading layer `Private residential' from data source `/Users/mel/Desktop/take_home_exe3/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3604 features and 5 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 103.6295 ymin: 1.23943 xmax: 103.9749 ymax: 1.45379
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Residential <- st_transform(Residential, CRS("+init=EPSG:3414"))
Residential <- as_Spatial(Residential)
Residential
## class : SpatialPointsDataFrame
## features : 3604
## extent : 5316.959, 43760.83, 24675.4, 48378.23 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## variables : 5
## names : POI_ID, SEQ_NUM, FAC_TYPE, POI_NAME, ST_NAME
## min values : 36439981, 1, 9590, 1 BALMORAL ROAD, 1ST ST
## max values : 1204180752, 2, 9590, ZION MANSION, ZION RD
Residential_point <- poly.counts(Residential,sg_sp)
sg_sf <- cbind.data.frame(sg_sf,Residential_point)
Combining latest set of sg_sf with geodemographic according their common variables which is their Subzone name. However, in sg_sf the subzone column named it as “SUBZONE_N” and in geodemographic the subzone column named it as “SZ”. Therefore, we need to use “by” to join their both data set together.
output <- left_join(sg_sf, geodemographic, by = c("SUBZONE_N" = "SZ"))
## Warning: Column `SUBZONE_N`/`SZ` joining factor and character vector, coercing
## into character vector
The unit of measurement of the values for number of people in each age group are number of total population within each subzone.
Using these values directly will be baised by the underlying total number of population.
The unit of measurement of the values for number of people in each housing group are number of total population within each subzone.
Using these values directly will be bais by the underlying total number of population.
In order to overcome this problem, we will derive the penetration rate of each variable
output <- output %>%
mutate(Economy_PR = `Economy`/`total_pop`*100) %>%
mutate(Young_PR = `Young`/`total_pop`*100) %>%
mutate(Aged_PR = `Aged`/`total_pop`*100) %>%
mutate(Density = `Pop_density`/`SHAPE_Area`*100) %>%
mutate(`HDB1-2RM_PR` = `HDB1-2`/`dweller`*100) %>%
mutate(`HDB3-4RM_PR` = `HDB3-4`/`dweller`*100) %>%
mutate(`HDB5RM-Ec_PR` = `HDB5-Ec`/`dweller`*100) %>%
mutate(`Cond_Apt_PR` = `Cond_Apt`/`dweller`*100) %>%
mutate(`Landed_PR` = `Landed`/`dweller`*100)
After all the data preparation, now we can remove those unwanted columns which do not plays any analysis roles in it.
output <- subset(output, select = -c(1:2,4:15,23:32,dweller))
summary(output)
## SUBZONE_N geometry Gov_emb_point Business_point
## Length:323 MULTIPOLYGON :323 Min. : 0.000 Min. : 0.00
## Class :character epsg:3414 : 0 1st Qu.: 0.000 1st Qu.: 0.00
## Mode :character +proj=tmer...: 0 Median : 0.000 Median : 2.00
## Mean : 1.372 Mean : 19.94
## 3rd Qu.: 1.000 3rd Qu.: 14.00
## Max. :19.000 Max. :308.00
##
## Industry_point Financial_point shopping_point Residential_point
## Min. :0.0000 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 0.00
## Median :0.0000 Median : 5.00 Median : 0.000 Median : 4.00
## Mean :0.3406 Mean : 10.28 Mean : 1.582 Mean : 11.16
## 3rd Qu.:0.0000 3rd Qu.: 13.00 3rd Qu.: 1.000 3rd Qu.: 11.00
## Max. :8.0000 Max. :134.00 Max. :31.000 Max. :217.00
##
## Economy_PR Young_PR Aged_PR Density
## Min. : 5.263 Min. : 0.00 Min. : 0.00 Min. :0.000
## 1st Qu.:57.064 1st Qu.:21.46 1st Qu.:10.75 1st Qu.:0.000
## Median :59.023 Median :25.31 Median :15.23 Median :0.591
## Mean :59.611 Mean :24.75 Mean :15.64 Mean :1.074
## 3rd Qu.:60.549 3rd Qu.:28.62 3rd Qu.:19.42 3rd Qu.:1.993
## Max. :90.000 Max. :55.88 Max. :94.74 Max. :4.606
## NA's :89 NA's :89 NA's :89
## HDB1-2RM_PR HDB3-4RM_PR HDB5RM-Ec_PR Cond_Apt_PR
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 2.675
## Median : 0.000 Median :40.27 Median :14.40 Median : 14.571
## Mean : 4.037 Mean :35.50 Mean :16.41 Mean : 30.746
## 3rd Qu.: 4.794 3rd Qu.:60.67 3rd Qu.:25.94 3rd Qu.: 49.172
## Max. :71.293 Max. :94.81 Max. :83.64 Max. :100.000
## NA's :95 NA's :95 NA's :95 NA's :95
## Landed_PR
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 0.00
## Mean : 13.30
## 3rd Qu.: 14.52
## Max. :100.00
## NA's :95
output$Economy_PR[is.nan(output$Economy_PR)] = 0
output$Young_PR[is.nan(output$Young_PR)] = 0
output$Aged_PR[is.nan(output$Aged_PR)] = 0
output$`HDB1-2RM_PR`[is.nan(output$`HDB1-2RM_PR`)] = 0
output$`HDB3-4RM_PR`[is.nan(output$`HDB3-4RM_PR`)] = 0
output$`HDB5RM-Ec_PR`[is.nan(output$`HDB5RM-Ec_PR`)] = 0
output$Cond_Apt_PR[is.nan(output$Cond_Apt_PR)] = 0
output$Landed_PR[is.nan(output$Landed_PR)] = 0
cluster_vars.cor = cor(output[,3:17])
corrplot.mixed(cluster_vars.cor,
lower = "ellipse",
upper = "number",
tl.pos = "lt",
diag = "l",
tl.col = "black",
number.cex = .5)
cluster_vars <- output %>%
st_set_geometry(NULL)
head(cluster_vars,10)
## SUBZONE_N Gov_emb_point Business_point Industry_point Financial_point
## 1 MARINA SOUTH 0 0 0 3
## 2 PEARL'S HILL 1 6 0 25
## 3 BOAT QUAY 2 40 0 2
## 4 HENDERSON HILL 0 0 0 4
## 5 REDHILL 0 2 0 12
## 6 ALEXANDRA HILL 7 39 1 15
## 7 BUKIT HO SWEE 4 6 0 6
## 8 CLARKE QUAY 4 12 0 19
## 9 PASIR PANJANG 1 0 16 0 4
## 10 QUEENSWAY 0 0 0 2
## shopping_point Residential_point Economy_PR Young_PR Aged_PR Density
## 1 0 0 0.00000 0.00000 0.00000 0.00000000
## 2 3 6 51.58371 16.59125 31.82504 1.18431719
## 3 1 1 71.42857 0.00000 28.57143 0.04353031
## 4 0 5 56.05381 19.58146 24.36472 2.24711973
## 5 1 6 58.34110 26.46785 15.19105 2.76953656
## 6 1 4 54.86212 21.11756 24.02032 1.33737225
## 7 1 11 56.42760 19.28281 24.28958 2.67883659
## 8 10 6 83.33333 0.00000 16.66667 0.02067649
## 9 0 56 62.07675 25.28217 12.64108 0.40837310
## 10 1 1 72.41379 10.34483 17.24138 0.04591192
## HDB1-2RM_PR HDB3-4RM_PR HDB5RM-Ec_PR Cond_Apt_PR Landed_PR
## 1 0.00000 0.00000 0.000000 0.000000 0.0
## 2 71.29338 22.08202 0.000000 6.624606 0.0
## 3 0.00000 0.00000 0.000000 100.000000 0.0
## 4 29.37220 59.71599 9.491779 1.420030 0.0
## 5 18.48030 33.02064 30.393996 18.105066 0.0
## 6 29.28839 47.34082 23.370787 0.000000 0.0
## 7 25.08475 58.37288 12.881356 3.661017 0.0
## 8 0.00000 0.00000 0.000000 100.000000 0.0
## 9 0.00000 0.00000 0.000000 67.500000 32.5
## 10 0.00000 0.00000 0.000000 100.000000 0.0
row.names(cluster_vars) <- cluster_vars$"SUBZONE_N"
head(cluster_vars,10)
## SUBZONE_N Gov_emb_point Business_point Industry_point
## MARINA SOUTH MARINA SOUTH 0 0 0
## PEARL'S HILL PEARL'S HILL 1 6 0
## BOAT QUAY BOAT QUAY 2 40 0
## HENDERSON HILL HENDERSON HILL 0 0 0
## REDHILL REDHILL 0 2 0
## ALEXANDRA HILL ALEXANDRA HILL 7 39 1
## BUKIT HO SWEE BUKIT HO SWEE 4 6 0
## CLARKE QUAY CLARKE QUAY 4 12 0
## PASIR PANJANG 1 PASIR PANJANG 1 0 16 0
## QUEENSWAY QUEENSWAY 0 0 0
## Financial_point shopping_point Residential_point Economy_PR
## MARINA SOUTH 3 0 0 0.00000
## PEARL'S HILL 25 3 6 51.58371
## BOAT QUAY 2 1 1 71.42857
## HENDERSON HILL 4 0 5 56.05381
## REDHILL 12 1 6 58.34110
## ALEXANDRA HILL 15 1 4 54.86212
## BUKIT HO SWEE 6 1 11 56.42760
## CLARKE QUAY 19 10 6 83.33333
## PASIR PANJANG 1 4 0 56 62.07675
## QUEENSWAY 2 1 1 72.41379
## Young_PR Aged_PR Density HDB1-2RM_PR HDB3-4RM_PR
## MARINA SOUTH 0.00000 0.00000 0.00000000 0.00000 0.00000
## PEARL'S HILL 16.59125 31.82504 1.18431719 71.29338 22.08202
## BOAT QUAY 0.00000 28.57143 0.04353031 0.00000 0.00000
## HENDERSON HILL 19.58146 24.36472 2.24711973 29.37220 59.71599
## REDHILL 26.46785 15.19105 2.76953656 18.48030 33.02064
## ALEXANDRA HILL 21.11756 24.02032 1.33737225 29.28839 47.34082
## BUKIT HO SWEE 19.28281 24.28958 2.67883659 25.08475 58.37288
## CLARKE QUAY 0.00000 16.66667 0.02067649 0.00000 0.00000
## PASIR PANJANG 1 25.28217 12.64108 0.40837310 0.00000 0.00000
## QUEENSWAY 10.34483 17.24138 0.04591192 0.00000 0.00000
## HDB5RM-Ec_PR Cond_Apt_PR Landed_PR
## MARINA SOUTH 0.000000 0.000000 0.0
## PEARL'S HILL 0.000000 6.624606 0.0
## BOAT QUAY 0.000000 100.000000 0.0
## HENDERSON HILL 9.491779 1.420030 0.0
## REDHILL 30.393996 18.105066 0.0
## ALEXANDRA HILL 23.370787 0.000000 0.0
## BUKIT HO SWEE 12.881356 3.661017 0.0
## CLARKE QUAY 0.000000 100.000000 0.0
## PASIR PANJANG 1 0.000000 67.500000 32.5
## QUEENSWAY 0.000000 100.000000 0.0
sg_geo <- dplyr :: select(cluster_vars, c(2:16))
proxmat <- dist(sg_geo, method = 'euclidean')
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
ac <- function(x) {
agnes(sg_geo, method = x)$ac
}
map_dbl(m, ac)
## average single complete ward
## 0.9286389 0.8688179 0.9525380 0.9836760
hclust_ward <- hclust(proxmat, method = 'ward.D')
plot(hclust_ward, cex = 0.1)
fviz_nbclust(sg_geo, hcut, method = "wss")
plot(hclust_ward, cex = 0.1)
rect.hclust(hclust_ward, k = 5, border = 2:5)
Transforming the data frame into a matrix
sg_geo_mat <- data.matrix(sg_geo)
heatmaply(normalize(sg_geo_mat),
Colv=NA,
dist_method = "euclidean",
hclust_method = "ward.D",
seriate = "OLO",
colors = Blues,
k_row = 5,
margins = c(NA,200,60,NA),
fontsize_row = 4,
fontsize_col = 5,
main="Geographic Segmentation of Singapore Subzone",
xlab = "Indicators",
ylab = "Subzone Name"
)
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
## dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
## Referenced from: /Library/Frameworks/R.framework/Resources/modules//R_X11.so
## Reason: image not found
groups <- as.factor(cutree(hclust_ward, k=5))
sg_sf_cluster <- cbind(sg_sf, as.matrix(groups)) %>%
rename(`CLUSTER`= `as.matrix(groups)`)
sg_sf_cluster <- st_as_sf(sg_sf_cluster)
qtm(sg_sf_cluster, "CLUSTER")
## Warning: The shape sg_sf_cluster is invalid. See sf::st_is_valid
sg_sf_cluster_all <- left_join(sg_sf_cluster, sg_geo)
## Joining, by = c("Gov_emb_point", "Business_point", "Industry_point", "Financial_point", "shopping_point", "Residential_point")
sg_sf_cluster1 <- sg_sf_cluster_all %>%
filter(CLUSTER == 1) %>%
st_set_geometry(NULL)
sg_sf_cluster1 = subset(sg_sf_cluster1, select = -c(1:15,CLUSTER))
Gov_emb_point <- ggplot(data=sg_sf_cluster1,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_cluster1,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_cluster1,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_cluster1,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_cluster1,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_cluster1,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_cluster1,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_cluster1,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_cluster1,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_cluster1,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_cluster1,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_cluster1,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_cluster1,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_cluster1,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 1: represented by the colour green, it is noted this cluster is high in business,financial and Gov_emb
Cluster 2
sg_sf_cluster2 <- sg_sf_cluster_all %>%
filter(CLUSTER == 2) %>%
st_set_geometry(NULL)
sg_sf_cluster2 = subset(sg_sf_cluster2, select = -c(1:15,CLUSTER))
Gov_emb_point <- ggplot(data=sg_sf_cluster2,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_cluster2,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_cluster2,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_cluster2,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_cluster2,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_cluster2,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_cluster2,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_cluster2,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_cluster2,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_cluster2,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_cluster2,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_cluster2,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_cluster2,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_cluster2,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 2: represented by the colour yellow, this cluster is high in Economy group, young group, landed and cond_apt
Cluster 3
sg_sf_cluster3 <- sg_sf_cluster_all %>%
filter(CLUSTER == 3) %>%
st_set_geometry(NULL)
sg_sf_cluster3 = subset(sg_sf_cluster3, select = -c(1:15,CLUSTER))
Gov_emb_point <- ggplot(data=sg_sf_cluster3,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_cluster3,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_cluster3,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_cluster3,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_cluster3,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_cluster3,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_cluster3,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_cluster3,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_cluster3,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_cluster3,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_cluster3,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_cluster3,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_cluster3,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_cluster3,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 3:represented by the colour purple, this cluster is high in Economy group, Young group, cond_apt and finacial
Cluster 4
sg_sf_cluster4 <- sg_sf_cluster_all %>%
filter(CLUSTER == 4) %>%
st_set_geometry(NULL)
sg_sf_cluster4 = subset(sg_sf_cluster4, select = -c(1:15,CLUSTER))
Gov_emb_point <- ggplot(data=sg_sf_cluster4,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_cluster4,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_cluster4,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_cluster4,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_cluster4,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_cluster4,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_cluster4,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_cluster4,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_cluster4,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_cluster4,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_cluster4,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_cluster4,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_cluster4,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_cluster4,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 4: represented by the colour red, this cluster ishigh in financial, residentail, Economy and HDB 3-4 Room
Cluster 5
sg_sf_cluster5 <- sg_sf_cluster_all %>%
filter(CLUSTER == 5) %>%
st_set_geometry(NULL)
sg_sf_cluster5 = subset(sg_sf_cluster5, select = -c(1:15,CLUSTER))
Gov_emb_point <- ggplot(data=sg_sf_cluster5,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_cluster5,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_cluster5,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_cluster5,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_cluster5,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_cluster5,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_cluster5,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_cluster5,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_cluster5,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_cluster5,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_cluster5,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_cluster5,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_cluster5,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_cluster5,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 5: represented by the colour blue, this cluster is high in business and Industry
Spatially Constrained Clustering - SKATER approach
Computing Neighbour List
sg.nb <- poly2nb(sg_sp)
summary(sg.nb)
## Neighbour list object:
## Number of regions: 323
## Number of nonzero links: 1934
## Percentage nonzero weights: 1.853751
## Average number of links: 5.987616
## 5 regions with no links:
## 17 18 19 295 302
## Link number distribution:
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 14 17
## 5 2 6 10 26 77 87 51 34 16 3 3 1 1 1
## 2 least connected regions:
## 16 234 with 1 link
## 1 most connected region:
## 313 with 17 links
sg_sp$SUBZONE_N[17]
## [1] SUDONG
## 323 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sg_sp$SUBZONE_N[18]
## [1] SEMAKAU
## 323 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sg_sp$SUBZONE_N[19]
## [1] SOUTHERN GROUP
## 323 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sg_sp$SUBZONE_N[295]
## [1] PULAU SELETAR
## 323 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sg_sp$SUBZONE_N[302]
## [1] NORTH-EASTERN ISLANDS
## 323 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sg.nb <- subset(sg.nb, subset=card(sg.nb) > 0)
summary(sg.nb)
## Neighbour list object:
## Number of regions: 318
## Number of nonzero links: 1934
## Percentage nonzero weights: 1.912503
## Average number of links: 6.081761
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 14 17
## 2 6 10 26 77 87 51 34 16 3 3 1 1 1
## 2 least connected regions:
## 16 234 with 1 link
## 1 most connected region:
## 313 with 17 links
sg_sp1 <-sg_sf[-c(17,18,19,295,302),]
sg_sp1 <- st_as_sf(sg_sp1)
sg_sp1 <- as_Spatial(sg_sp1)
plot(sg_sp1, border=grey(.5))
plot(sg.nb, coordinates(sg_sp1), col="blue", add=TRUE)
Calculating edge costs
lcosts <- nbcosts(sg.nb, sg_geo)
sg.w <- nb2listw(sg.nb, lcosts, style="B")
summary(sg.w)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 318
## Number of nonzero links: 1934
## Percentage nonzero weights: 1.912503
## Average number of links: 6.081761
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 14 17
## 2 6 10 26 77 87 51 34 16 3 3 1 1 1
## 2 least connected regions:
## 16 234 with 1 link
## 1 most connected region:
## 313 with 17 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 318 101124 188090 47938346 535809272
The minimum spanning tree is computed
sg.mst <- mstree(sg.w)
class(sg.mst)
## [1] "mst" "matrix"
dim(sg.mst)
## [1] 317 3
Display the content of sg.mst by using head()
head(sg.mst)
## [,1] [,2] [,3]
## [1,] 56 96 106.23118
## [2,] 96 63 70.24145
## [3,] 63 82 45.03377
## [4,] 82 117 58.24637
## [5,] 117 51 76.56425
## [6,] 51 43 73.27486
plot(sg_sp1, border=gray(.5))
plot.mst(sg.mst, coordinates(sg_sp1),
col="blue", cex.lab=0.7, cex.circles=0.005, add=TRUE)
Compute the spatially constrained cluster using skater()
clust5 <- skater(sg.mst[,1:2], sg_geo, 4)
str(clust5)
## List of 8
## $ groups : num [1:318] 1 1 1 1 1 1 1 1 1 1 ...
## $ edges.groups:List of 5
## ..$ :List of 3
## .. ..$ node: num [1:262] 146 96 92 161 164 42 69 259 40 117 ...
## .. ..$ edge: num [1:261, 1:3] 161 96 69 92 42 164 92 40 192 117 ...
## .. ..$ ssw : num 18261
## ..$ :List of 3
## .. ..$ node: num [1:5] 149 140 112 143 184
## .. ..$ edge: num [1:4, 1:3] 149 149 140 112 140 ...
## .. ..$ ssw : num 209
## ..$ :List of 3
## .. ..$ node: num [1:29] 312 285 298 316 292 294 289 265 303 313 ...
## .. ..$ edge: num [1:28, 1:3] 285 285 280 292 265 279 294 289 316 296 ...
## .. ..$ ssw : num 1530
## ..$ :List of 3
## .. ..$ node: num [1:10] 32 22 21 26 17 11 48 18 75 19
## .. ..$ edge: num [1:9, 1:3] 26 11 48 21 21 17 32 18 22 11 ...
## .. ..$ ssw : num 205
## ..$ :List of 3
## .. ..$ node: num [1:12] 134 194 135 102 106 136 156 138 195 167 ...
## .. ..$ edge: num [1:11, 1:3] 194 194 156 136 106 102 138 194 135 195 ...
## .. ..$ ssw : num 749
## $ not.prune : NULL
## $ candidates : int [1:5] 1 2 3 4 5
## $ ssto : num 23366
## $ ssw : num [1:5] 23366 22425 21872 21322 20954
## $ crit : num [1:2] 1 Inf
## $ vec.crit : num [1:318] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "class")= chr "skater"
ccs5 <- clust5$groups
ccs5
## [1] 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 4 4 4 1 4 4 1 1 1 4 1 1 1 1 1 4 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 1 5 5 1 1 5 1
## [112] 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 5 1 5 1 2 1 1 2 1 1 1 1 1
## [149] 2 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
## [186] 1 1 1 1 1 1 1 1 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1
## [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1
## [260] 1 3 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 3 1 3 3 3 1 3 3 1 3 3 3
## [297] 3 3 3 1 3 3 3 3 1 3 1 1 1 1 1 3 3 1 3 3 3 3
table(ccs5)
## ccs5
## 1 2 3 4 5
## 262 5 29 10 12
plot(sg_sp1, border=gray(.5))
plot(clust5, coordinates(sg_sp1), cex.lab=.7,
groups.colors=c("red","green","blue", "brown", "pink"), cex.circles=0.005, add=TRUE)
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
sg_sf_cluster <-sg_sf_cluster[-c(17,18,19,295,302),]
groups_mat <- as.matrix(clust5$groups)
sg_sf_spatialcluster <- cbind(sg_sf_cluster, as.factor(groups_mat)) %>%
rename(`SP_CLUSTER`=`as.factor.groups_mat.`)
qtm(sg_sf_spatialcluster, "SP_CLUSTER")
## Warning: The shape sg_sf_spatialcluster is invalid. See sf::st_is_valid
sg_sf_spatialcluster <- left_join(sg_sf_spatialcluster, sg_geo)
## Joining, by = c("Gov_emb_point", "Business_point", "Industry_point", "Financial_point", "shopping_point", "Residential_point")
sg_sf_spatialcluster1 <- sg_sf_spatialcluster %>%
filter(SP_CLUSTER == 1) %>%
st_set_geometry(NULL)
sg_sf_spatialcluster1 = subset(sg_sf_spatialcluster1, select = -c(1:15))
Gov_emb_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_spatialcluster1,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 1: represented by the colour green, this cluster mainly covers the majority of Singapore. This is probably because of the planning from the Singapore government where they ensure that all residents within that particular subzones has certain urban functions, for example, within one subzone, there are banking services, shopping malls and all these characteristics applirs to government subzones too. There cluster thus have no unique features/variables.
Cluster 2:
sg_sf_spatialcluster2 <- sg_sf_spatialcluster %>%
filter(SP_CLUSTER == 2) %>%
st_set_geometry(NULL)
sg_sf_spatialcluster2 = subset(sg_sf_spatialcluster2, select = -c(1:15))
Gov_emb_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_spatialcluster2,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 2: represented by the colour yellow, this cluster is high in business,financial, Economy and HDB5Rm-EC. This cluster is located at Jurong area. It is an area that the government has focused on building up businesses. It houses several long-established reputable business brands due to its low rental cost, far away from the CBD area where rental costs tends to be on the higher end of the spectrum.
Cluster 3
sg_sf_spatialcluster3 <- sg_sf_spatialcluster %>%
filter(SP_CLUSTER == 3) %>%
st_set_geometry(NULL)
sg_sf_spatialcluster3 = subset(sg_sf_spatialcluster3, select = -c(1:15))
Gov_emb_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_spatialcluster3,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 3: represented by the colour purple, this cluster is high in Financial, Economy, HBD 5RM. This cluster is mainly located in Woodlands. It is known to be a residential area with 5 room because of the low pricing as it is located relatively further away from the CBD area of Singpaore, which is at Orchard. Hence, people of the lower income and larger families will be more inclined to looking into such residential areas. Alsp, as we know that woodlands is near to the Custom border, economy tends to be higher due to the influx and outflux exchange of goods and services between Singapore and Malaysia. Hence, it is understandable that manmny industrials will be located near the custom to reduce delivery cost.
Cluster 4
sg_sf_spatialcluster4 <- sg_sf_spatialcluster %>%
filter(SP_CLUSTER == 4) %>%
st_set_geometry(NULL)
sg_sf_spatialcluster4 = subset(sg_sf_spatialcluster4, select = -c(1:15))
Gov_emb_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_spatialcluster4,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 4: represented by the colour red, this cluster is high in business,financial, Economy and HBD3-4RM This is mainly located at Harbourfront. They have the seaport located right in that subzone where foreign goods and services are being brought in and out of Singapore. Hence, this accounts to the financial and economy side of Singapore. As shown, it also covers Sentosa island, in which Singpaore has largelyu developed that area to attract tourists. Attractions such as the Casino as well as Hard Rock Hotel, SEA Aquarium and even Universal Studios Singapore has definitely boosts the economy and business around that area and generated much income towards the Singapore GDP. As for housing, the houses built in that cluster is likley to be facing the sea, hence, HDB would cater more for 3-4 room due to the smaller land space and to accomodate as much rooms/estates around that area to increase population, hence higher in 3-4 rooms than 5 rooms. Cluster 5
sg_sf_spatialcluster5 <- sg_sf_spatialcluster %>%
filter(SP_CLUSTER == 5) %>%
st_set_geometry(NULL)
sg_sf_spatialcluster5 = subset(sg_sf_spatialcluster5, select = -c(1:15))
Gov_emb_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Gov_emb_point`)) +
geom_boxplot()
Business_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Business_point`)) +
geom_boxplot()
Industry_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Industry_point`)) +
geom_boxplot()
Financial_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Financial_point`)) +
geom_boxplot()
shopping_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `shopping_point`)) +
geom_boxplot()
Residential_point <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Residential_point`)) +
geom_boxplot()
Economy_PR <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Economy_PR`)) +
geom_boxplot()
Young_PR <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Young_PR`)) +
geom_boxplot()
Aged_PR <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Aged_PR`)) +
geom_boxplot()
`HDB3-4RM_PR` <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `HDB3-4RM_PR`)) +
geom_boxplot()
`HDB1-2RM_PR` <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `HDB1-2RM_PR`)) +
geom_boxplot()
`HDB5RM-Ec_PR` <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `HDB5RM-Ec_PR`)) +
geom_boxplot()
Cond_Apt_PR <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Cond_Apt_PR`)) +
geom_boxplot()
Landed_PR <- ggplot(data=sg_sf_spatialcluster5,
aes(x= `Landed_PR`)) +
geom_boxplot()
ggarrange(Gov_emb_point, Business_point, Industry_point, Financial_point, shopping_point, Residential_point, Economy_PR, Young_PR, Aged_PR, `HDB3-4RM_PR`, `HDB1-2RM_PR`,`HDB5RM-Ec_PR`,Cond_Apt_PR,Landed_PR,
ncol = 4,
nrow = 4)
Cluster 5: represented by the colour blue, this cluster is high in business,financial, HBD5RM-EC and Economy. This is mainly located at the CBD town area where it is known as the heart of Singapore. Several pretigious shopping malls are located at the heart of Singapore where events are always regularly held there. Hence, accounting for the business, financial and economy. As for the HDB5RM-EC, they are more inclined towards EC as compared to HDB is due to the high prices in the CBD area and using expatrates stay around such districts, such as district 9, 10 and 11. Hence, the EC, would befitting of their status as compared to HDB. Also, the presence of HDBs around that area may pull down the housing prices of the surrounding buildings.