How Singapore’s Dependent Population is distributed over the country
Import MP14_SUBZONE_WEB_PL shapfile into R
mpsz <- st_read(dsn = "data/geospatial",
layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `D:\SMU-MITB\Term 2\ISSS608 Visual Analytics and Applications\Class 11\Ice\Ice 2\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
#this comes from the package sf which is simple feature
#The st_read takes the spatial data inside the geospatial folder (shape file) and imports it into R and stores it as a data table
Downloading the dataset
#We then import this csv file, use read_csv, so when you call the df, itll give a list of 10 in tibbler format, else if you use read.csv, itll give the whole row set
popagsex <- read_csv("data/aspatial/respopagsex2000to2018.csv")
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
Aggregating Age as per dependency characteristics
popagsex2018_male <- popagsex %>%
filter(Sex == "Males") %>%
filter(Time == 2018) %>%
spread(AG, Pop) %>%
mutate(YOUNG = `0_to_4`+`5_to_9`+`10_to_14`+
`15_to_19`+`20_to_24`) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[9:13])+
rowSums(.[15:17]))%>%
mutate(`AGED`=rowSums(.[18:22])) %>%
mutate(`TOTAL`=rowSums(.[5:22])) %>%
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%
mutate_at(.vars = vars(PA, SZ), .funs = funs(toupper)) %>%
dplyr::select(`PA`, `SZ`, `YOUNG`, `ECONOMY ACTIVE`, `AGED`,
`TOTAL`, `DEPENDENCY`) %>%
filter(`ECONOMY ACTIVE` > 0)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
mpsz_agemale2018 <- left_join(mpsz, popagsex2018_male,
by = c("SUBZONE_N" = "SZ"))
## Warning: Column `SUBZONE_N`/`SZ` joining factor and character vector, coercing
## into character vector
qtm(mpsz_agemale2018, fill = "DEPENDENCY")
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018) +
tm_polygons() #the polygon is similar to geom_bar etc. It is a layer to add a shading polygon.
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

#Now we add a variable to see the values inside
tm_shape(mpsz_agemale2018)+
tm_polygons("DEPENDENCY")
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018)+
tm_fill("DEPENDENCY") +
tm_borders(lwd=0.3, alpha = 0.6, col="Blue")
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

Plotting the Dependent population
tm_shape(mpsz_agemale2018)+
tm_fill("DEPENDENCY",
n = 10, #Number of cluster
style = "quantile", #Style is the number of clusters
palette="Purples") +
tm_borders(alpha = 0.5)
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018)+
tm_fill("DEPENDENCY",
style = "quantile",
palette = "Purples",
legend.hist = TRUE, #by default it is false
legend.is.portrait = TRUE,
legend.hist.z = 0.1) +
tm_layout(main.title="Distribution of Dependency Ratio \nby planning area",
main.title.position = "center",
legend.height = 0.45,
legend.width = 0.35,
legend.outside = TRUE,
legend.position = c("right", "bottom"), #Can be adjusted to left/right top/bottom but will only show effect if the legend outside parameter is set to False
frame = FALSE) +
tm_borders(alpha = 0.5)
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018)+
tm_fill("DEPENDENCY",
style = "quantile",
palette = "Purples",
title = "No. of persons") +
tm_layout(main.title = "Distribution of Dependency Ratio \nby planning subzone",
main.title.position = "center",
main.title.size = 1.2,
legend.height = 0.45,
legend.width = 0.35,
frame = TRUE) +
tm_borders(alpha = 0.5) +
tm_compass(type="radar", size = 2) +
tm_scale_bar(width = 0.15) +
tm_grid(lwd = 0.02, alpha=0.3) +
tm_credits("Source: Planning Sub-zone boundary from Urban Redevelopment Authorithy (URA)\n and Population data from Department of Statistics DOS",
position = c("left", "bottom"))
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018)+
tm_polygons(c("DEPENDENCY","DEPENDENCY"),
style = c("equal", "quantile"),
palette = list("Purples","Purples")) +
tm_layout(legend.position = c("right", "bottom"))
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

tm_shape(mpsz_agemale2018) +
tm_fill("DEPENDENCY",
style = "quantile",
palette = "Purples",
thres.poly = 0) +
tm_facets(by="REGION_N",
free.coords=TRUE,
drop.shapes=TRUE) +
tm_layout(legend.show = FALSE,
title.position = c("center", "center"),
title.size = 20) +
tm_borders(alpha = 0.5)
## Warning: The argument drop.shapes has been renamed to drop.units, and is
## therefore deprecated
## Warning: The shape mpsz_agemale2018 is invalid. See sf::st_is_valid

Zooming into the Central Region
tm_shape(mpsz_agemale2018[mpsz_agemale2018$REGION_N=="CENTRAL REGION", ])+
tm_fill("DEPENDENCY",
style = "quantile",
palette = "Purples",
legend.hist = TRUE,
legend.is.portrait = TRUE,
legend.hist.z = 0.1) +
tm_layout(legend.outside = TRUE,
legend.height = 0.45,
legend.width = 5.0,
legend.position = c("right", "bottom"),
frame = FALSE) +
tm_borders(alpha = 0.5)
## Warning: The shape mpsz_agemale2018[mpsz_agemale2018$REGION_N == "CENTRAL
## REGION", is invalid. See sf::st_is_valid

In this dataset the coordinates are explicitly mentioned. It has no spacial properties, it only has columns representing x and y coordinates. Hence we make use of the sf package
sgpools <- read_csv("data/aspatial/SGPools_svy21.csv")
## Parsed with column specification:
## cols(
## NAME = col_character(),
## ADDRESS = col_character(),
## POSTCODE = col_double(),
## XCOORD = col_double(),
## YCOORD = col_double(),
## `OUTLET TYPE` = col_character(),
## `Gp1Gp2 Winnings` = col_double()
## )
list(sgpools)
## [[1]]
## # A tibble: 306 x 7
## NAME ADDRESS POSTCODE XCOORD YCOORD `OUTLET TYPE` `Gp1Gp2 Winning~
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Livewire~ 2 Bayfront A~ 18972 30842. 29599. Branch 5
## 2 Livewire~ 26 Sentosa G~ 98138 26704. 26526. Branch 11
## 3 SportsBu~ Lotus Lounge~ 738078 20118. 44888. Branch 0
## 4 SportsBu~ 1 Selegie Rd~ 188306 29777. 31382. Branch 44
## 5 Prime Se~ Blk 542B Ser~ 552542 32239. 39519. Branch 0
## 6 Singapor~ 1A Woodlands~ 731001 21012. 46987. Branch 3
## 7 Singapor~ Blk 64 Circu~ 370064 33990. 34356. Branch 17
## 8 Singapor~ Blk 88 Circu~ 370088 33847. 33976. Branch 16
## 9 Singapor~ Blk 308 Anch~ 540308 33910. 41275. Branch 21
## 10 Singapor~ Blk 202 Ang ~ 560202 29246. 38943. Branch 25
## # ... with 296 more rows
sgpools_sf <- st_as_sf(sgpools, #Converting from st to sf data
coords = c("XCOORD", "YCOORD"), #You need to explicitly state which is x and which is y coordinate and also the crs is 3414
crs= 3414)
#this concatenates the X and Y coordinates and so tmap can now identify this and use it.
Visualizing SingPool outlets
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(sgpools_sf)+
tm_bubbles(col = "red",
size = 1,
border.col = "black",
border.lwd = 1)