Import packages
packages = c('rgdal', 'spdep', 'tmap', 'tidyverse', 'sf', 'sp', 'rgeos', 'ggplot2', 'plotly')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-6, (SVN revision 841)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/Jenny/Documents/R/win-library/3.6/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/Jenny/Documents/R/win-library/3.6/rgdal/proj
## Linking to sp version: 1.3-1
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Loading required package: tmap
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: rgeos
## rgeos version: 0.5-2, (SVN revision 621)
## GEOS runtime version: 3.6.1-CAPI-1.10.1
## Linking to sp version: 1.3-1
## Polygon checking: TRUE
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Import data
1. Residential population data
popagesex <- read_csv("data/aspatial/respopagesextod2011to2019.csv")
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## TOD = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
2. Planning sub-zone level
sf_mpsz <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `C:\Users\Jenny\Documents\SMU Documents\YEAR 3\Year 3 Semester 2\IS415 - Geospatial Analytics and Applications\Take-home exercises\IS415_Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
3. Passenger volume by busstop
passengervolume <- read_csv("data/aspatial/passenger volume by busstop.csv")
## Parsed with column specification:
## cols(
## YEAR_MONTH = col_character(),
## DAY_TYPE = col_character(),
## TIME_PER_HOUR = col_double(),
## PT_TYPE = col_character(),
## PT_CODE = col_character(),
## TOTAL_TAP_IN_VOLUME = col_double(),
## TOTAL_TAP_OUT_VOLUME = col_double()
## )
4. Location of bus stop
sf_busstop <- st_read(dsn = "data/geospatial", layer = "BusStop")
## Reading layer `BusStop' from data source `C:\Users\Jenny\Documents\SMU Documents\YEAR 3\Year 3 Semester 2\IS415 - Geospatial Analytics and Applications\Take-home exercises\IS415_Take-home_Ex01\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5040 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
Assigning EPSG3414
1. sf_mpsz
sf_mpsz3414 <- st_set_crs(sf_mpsz, 3414)
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform
## for that
2. sf_busstop
sf_busstop3414 <- st_set_crs(sf_busstop, 3414)
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform
## for that
Data Wrangling
1. popagesex
popagesex_2019 <- popagesex %>%
filter(Time == 2019) %>%
group_by(SZ) %>%
summarise(Pop=sum(Pop)) %>%
mutate_at(.vars = vars(SZ), .funs = funs(toupper))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
2. passengervolume
passenger_volume <- passengervolume %>%
group_by(PT_CODE, DAY_TYPE) %>%
summarise(TOTAL_TAP_IN_VOLUME=sum(TOTAL_TAP_IN_VOLUME), TOTAL_TAP_OUT_VOLUME=sum(TOTAL_TAP_OUT_VOLUME))
3. sf_mpsz3414
filtered_sfmpsz3414 <- sf_mpsz3414 %>%
select(`SUBZONE_N`, `geometry`)
Joining df
1. filtered_sfmpsz3414 & popsexage2019
mpsz_popsexage <- left_join(filtered_sfmpsz3414, popagesex_2019, by = c("SUBZONE_N" = "SZ"))
## Warning: Column `SUBZONE_N`/`SZ` joining factor and character vector,
## coercing into character vector
2. sf_busstop3414 & passenger_volume
passenger_busstop <- left_join(sf_busstop3414, passenger_volume, by=c("BUS_STOP_N" = "PT_CODE"))
## Warning: Column `BUS_STOP_N`/`PT_CODE` joining factor and character vector,
## coercing into character vector
Convert Foreign Object To An Sf Object
sf_passenger_busstop <- st_as_sf(passenger_busstop)
Joining spacial objects
1. Total tap in volume
mpszpop_busstop_tapin <- st_join(mpsz_popsexage, sf_passenger_busstop, join=st_intersects) %>%
group_by(SUBZONE_N, Pop) %>%
drop_na(TOTAL_TAP_IN_VOLUME) %>%
filter(Pop > 0) %>%
summarise(TOTAL_TAP_IN_VOLUME = sum(TOTAL_TAP_IN_VOLUME))
2. Total tap out volume
mpszpop_busstop_tapout <- st_join(mpsz_popsexage, sf_passenger_busstop, join=st_intersects) %>%
group_by(SUBZONE_N, Pop) %>%
drop_na(TOTAL_TAP_OUT_VOLUME) %>%
filter(Pop > 0) %>%
summarise(TOTAL_TAP_OUT_VOLUME = sum(TOTAL_TAP_OUT_VOLUME))
Part 1: Simple Linear Regression
Calibrating a simple linear regression to reveal the relation between public bus commuters’ flows (i.e. tap-in or tap-out) data and residential population at the planning sub-zone level.
1. Residents population vs tap in volume
tap_in_volume <- lm(TOTAL_TAP_IN_VOLUME ~ Pop, data = mpszpop_busstop_tapin)
summary(tap_in_volume)
##
## Call:
## lm(formula = TOTAL_TAP_IN_VOLUME ~ Pop, data = mpszpop_busstop_tapin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -865253 -145751 -67765 66360 1299429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.195e+05 2.739e+04 4.364 1.93e-05 ***
## Pop 1.937e+01 1.062e+00 18.235 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 308300 on 230 degrees of freedom
## Multiple R-squared: 0.5911, Adjusted R-squared: 0.5893
## F-statistic: 332.5 on 1 and 230 DF, p-value: < 2.2e-16
2. Residents population vs tap out volume
tap_out_volume <- lm(Pop ~ TOTAL_TAP_OUT_VOLUME, data = mpszpop_busstop_tapout)
summary(tap_out_volume)
##
## Call:
## lm(formula = Pop ~ TOTAL_TAP_OUT_VOLUME, data = mpszpop_busstop_tapout)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49356 -5635 -980 4971 44643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.283e+03 1.107e+03 2.967 0.00333 **
## TOTAL_TAP_OUT_VOLUME 3.092e-02 1.680e-03 18.399 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12170 on 230 degrees of freedom
## Multiple R-squared: 0.5954, Adjusted R-squared: 0.5937
## F-statistic: 338.5 on 1 and 230 DF, p-value: < 2.2e-16
Plotting the Simple Linear Regression
1. Residents population vs tap in volume
r1 <- ggplot(mpszpop_busstop_tapin, aes(x = log(Pop), y = log(TOTAL_TAP_IN_VOLUME))) +
geom_point() +
stat_smooth(method = "lm", col = "red")
ggplotly(r1)
d_tapin <- mpszpop_busstop_tapin
fit_tapin <- lm(log(TOTAL_TAP_IN_VOLUME) ~ log(Pop), data = d_tapin) # fit the model
d_tapin$predicted <- predict(fit_tapin) # Save the predicted values
d_tapin$residuals <- residuals(fit_tapin) # Save the residual values
ggplot(d_tapin, aes(x = log(Pop), y = log(TOTAL_TAP_IN_VOLUME))) +
geom_smooth(method = "lm", se = FALSE, color = "lightgrey") + # regression line
geom_segment(aes(xend = log(Pop), yend = predicted), alpha = .2) + # draw line from point to line
geom_point(aes(color = abs(residuals), size = abs(residuals))) + # size of the points
scale_color_continuous(low = "green", high = "red") + # colour of the points mapped to residual size - green smaller, red larger
guides(color = FALSE, size = FALSE) + # Size legend removed
geom_point(aes(y = predicted), shape = 1) +
theme_bw()

summary(fit_tapin)
##
## Call:
## lm(formula = log(TOTAL_TAP_IN_VOLUME) ~ log(Pop), data = d_tapin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3424 -0.4376 -0.0068 0.5130 2.5164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.10219 0.30690 29.66 <2e-16 ***
## log(Pop) 0.38629 0.03389 11.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8777 on 230 degrees of freedom
## Multiple R-squared: 0.361, Adjusted R-squared: 0.3582
## F-statistic: 129.9 on 1 and 230 DF, p-value: < 2.2e-16
2. Residents population vs tap out volume
r2 <- ggplot(mpszpop_busstop_tapout, aes(x = log(Pop), y = log(TOTAL_TAP_OUT_VOLUME))) +
geom_point() +
stat_smooth(method = "lm", col = "red")
ggplotly(r2)
d_tapout <- mpszpop_busstop_tapout
fit_tapout <- lm(log(TOTAL_TAP_OUT_VOLUME) ~ log(Pop), data = d_tapout) # fit the model
d_tapout$predicted <- predict(fit_tapout) # Save the predicted values
d_tapout$residuals <- residuals(fit_tapout) # Save the residual values
ggplot(d_tapout, aes(x = log(Pop), y = log(TOTAL_TAP_OUT_VOLUME))) +
geom_smooth(method = "lm", se = FALSE, color = "lightgrey") + # regression line
geom_segment(aes(xend = log(Pop), yend = predicted), alpha = .2) + # draw line from point to line
geom_point(aes(color = abs(residuals), size = abs(residuals))) + # size of the points
scale_color_continuous(low = "green", high = "red") + # colour of the points mapped to residual size - green smaller, red larger
guides(color = FALSE, size = FALSE) + # Size legend removed
geom_point(aes(y = predicted), shape = 1) +
theme_bw()

summary(fit_tapout)
##
## Call:
## lm(formula = log(TOTAL_TAP_OUT_VOLUME) ~ log(Pop), data = d_tapout)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.39339 -0.46749 -0.02066 0.52707 2.42504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.10195 0.28975 31.41 <2e-16 ***
## log(Pop) 0.38788 0.03199 12.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8286 on 230 degrees of freedom
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.3873
## F-statistic: 147 on 1 and 230 DF, p-value: < 2.2e-16
Residuals vs Fitted Plot
Residual plots are used to look for underlying patterns in the residuals that may mean that the model has a problem.
1. Residents population vs tap in volume
plot(fit_tapin, which=1, col=c("blue")) # Residuals vs Fitted Plot

2. Residents population vs tap out volume
plot(fit_tapout, which=1, col=c("blue")) # Residuals vs Fitted Plot

Analysis of above Simple Linear Regression
1. The linear regression for both Total tap in volume vs Population and Total tap out volume vs Population is similar
2. The linearity assumption is violated
3. The relationship is weakly linear and it follows the shape of a parabolic curve
Part 2: Spatial Autocorrelation Analysis
Performing spatial autocorrelation analysis on the residual of the regression model to test if the model conforms to the randomization assumption.
Regression model used: Total tap in volume vs Population
mpsz_tapin <- left_join(mpsz_popsexage, d_tapin, by = c("SUBZONE_N" = "SUBZONE_N", "Pop" = "Pop")) %>%
drop_na(TOTAL_TAP_IN_VOLUME)
Choropleth Map
qtm(mpsz_tapin, "TOTAL_TAP_IN_VOLUME")
## Warning: The shape mpsz_tapin is invalid. See sf::st_is_valid

Statistical Test
1. Null hypotheses: Total tap in volume is randomly distributed among the population in Singapore
2. Alternative hypotheses: Total tap in volume is not randomly distributed among the population in Singapore
Confident interval: 95%
Creating contiguity based neighbours - Queens
wm_q <- poly2nb(mpsz_tapin, queen=TRUE)
summary(wm_q)
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 1182
## Percentage nonzero weights: 2.196046
## Average number of links: 5.094828
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9
## 4 10 26 35 69 41 31 12 4
## 4 least connected regions:
## 20 271 297 320 with 1 link
## 4 most connected regions:
## 95 101 169 181 with 9 links
The summary report above shows that:
1. There are 232 regions in Singapore. The most connected area unit has 9 neighbours. There 4 area units with only one neighbour.
2. The average no.of neighbours is 5
Plotting Queen contiguity based neighours maps
plot(mpsz_tapin$geometry, border="lightgrey")
plot(wm_q, sf::st_centroid(mpsz_tapin$geometry), pch = 19, cex = 0.6, add = TRUE, col= "red")

Computing distance based neighbours
Determine the cut-off distance
coords <- sf::st_centroid(mpsz_tapin$geometry)
k1 <- knn2nb(knearneigh(coords))
k1dists <- unlist(nbdists(k1, coords, longlat = TRUE))
## Warning in nbdists(k1, coords, longlat = TRUE): dnearneigh: longlat
## argument overrides object
summary(k1dists)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 182.5 616.6 879.3 914.3 1096.8 4597.9
Analysis of results above:
1. The cut-off distance is d = 4597.9
2. By using this as the upper threshold, it gives a certainty that all units will have at least one neighbour
Computing fixed distance weight matrix
wm_d62 <- dnearneigh(coords, 0, 4598, longlat = TRUE)
## Warning in dnearneigh(coords, 0, 4598, longlat = TRUE): dnearneigh: longlat
## argument overrides object
wm_d62
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 8736
## Percentage nonzero weights: 16.23068
## Average number of links: 37.65517
Computing adaptive distance weight matrix
wm_knn6 <- knn2nb(knearneigh(coords, k=6))
wm_knn6
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 1392
## Percentage nonzero weights: 2.586207
## Average number of links: 6
## Non-symmetric neighbours list
Analysis of results above:
1. Average number of links is 6 as specified above - k=6. Each subzone only has 6 neighbours
Plotting distance based neighbours
plot(mpsz_tapin$geometry, border="lightgrey")
plot(wm_knn6, sf::st_centroid(mpsz_tapin$geometry), pch = 19, cex = 0.6, add = TRUE, col= "red")

Row-standardised weights matrix
rswm_q <- nb2listw(wm_q, style="W", zero.policy = TRUE)
rswm_q
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 1182
## Percentage nonzero weights: 2.196046
## Average number of links: 5.094828
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 232 53824 232 99.897 957.3532
Weights matrix for queens method
rswm_q$weights[1]
## [[1]]
## [1] 0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125
Measure of Global Spatial Autocorrelation
Computing Moran’s I
1. Tap in volume
lm.morantest(tap_in_volume, nb2listw(wm_q, style="W"))
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = TOTAL_TAP_IN_VOLUME ~ Pop, data =
## mpszpop_busstop_tapin)
## weights: nb2listw(wm_q, style = "W")
##
## Moran I statistic standard deviate = -0.20721, p-value = 0.5821
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## -0.013180651 -0.004348385 0.001816843
Analysis of results above:
1. p-value = 0.5821
2. alpha = 0.95 (confidence interval)
3. Since 0.5821 is > 0.05, there is no statistical evidence to reject the null hypothesis
4. Moran’s I is not useful to determine the pattern)
5. We can draw the conclusion that the spatial pattern resemble a random distribution
Part 3: Localized Geospatial Statistics Analysis
Performing localized geospatial statistics analysis by using commuters’ tap-in and tap-out data to identify geographical clustering.
1. Total tap in volume
Computing local Moran’s I
fips <- order(mpsz_tapin$SUBZONE_N)
localMI <- localmoran(mpsz_tapin$Pop, rswm_q)
head(localMI)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 2 0.35363199 -0.004329004 0.1167109 1.0478037 0.1473645
## 3 0.69912394 -0.004329004 0.3179220 1.2475982 0.1060891
## 4 0.09266878 -0.004329004 0.1569532 0.2448369 0.4032914
## 5 0.14954331 -0.004329004 0.1891469 0.3538025 0.3617435
## 6 0.09818793 -0.004329004 0.1339576 0.2800992 0.3897007
## 7 0.06235776 -0.004329004 0.1339576 0.1822032 0.4277116
Combine it with subzone to see the details
#printCoefmat(data.frame(localMI[fips,], row.names=mpsz_tapin$SUBZONE_N[fips]), check.names=FALSE)
Plotting Moran scatterplot
nci <- moran.plot(mpsz_tapin$Pop, rswm_q, labels=as.character(mpsz_tapin$SUBZONE_N), xlab="Population", ylab="Total Tap In Volume")

Mapping the local Moran’s I
tapin.localMI <- cbind(mpsz_tapin,localMI)
Mapping both local Moran’s I values and p-values
localMI.map <- tm_shape(tapin.localMI) +
tm_fill(col = "Ii",
style = "pretty",
title = "local moran statistics") +
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(tapin.localMI) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-Blues",
title = "local Moran's I p-values") +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp=1, ncol=2)
## Warning: The shape tapin.localMI is invalid. See sf::st_is_valid
## Variable "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Warning: The shape tapin.localMI is invalid. See sf::st_is_valid

Analysis of results above:
1. Based on the map, we can see that some outliers are insignificant. However, using the eyeballing method might not be accurate. Hence, We use LISA to check on the outliers
Creating a LISA Cluster Map
Plotting Moran scatterplot
mpsz_tapin$Population <- scale(mpsz_tapin$Pop) %>% as.vector
nci <- moran.plot(mpsz_tapin$Population, rswm_q, labels=as.character(mpsz_tapin$SUBZONE_N), xlab="Population", ylab="Total tap in volume")

Preparing LISA cluster map
quadrant <- vector(mode="numeric",length=nrow(localMI))
DV <- mpsz_tapin$Pop - mean(mpsz_tapin$Pop)
C_mI <- localMI[,1] - mean(localMI[,1])
signif <- 0.05
quadrant[DV >0 & C_mI>0] <- 4
quadrant[DV <0 & C_mI<0] <- 1
quadrant[DV <0 & C_mI>0] <- 2
quadrant[DV >0 & C_mI<0] <- 3
quadrant[localMI[,5]>signif] <- 0
Building LISA map
tapin.localMI$quadrant <- quadrant
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tm_shape(tapin.localMI) +
tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("Postal.Code")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
## Warning: The shape tapin.localMI is invalid. See sf::st_is_valid

Hot Spot and Cold Spot Area Analysis
Creating adaptive proximity matrix
coords <- sf::st_centroid(mpsz_tapin$geometry)
knb <- knn2nb(knearneigh(coords, k=8, longlat = TRUE), row.names=row.names(mpsz_tapin$Pop))
## Warning in knearneigh(coords, k = 8, longlat = TRUE): dnearneigh: longlat
## argument overrides object
knb_lw <- nb2listw(knb, style = 'B')
summary(knb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 1856
## Percentage nonzero weights: 3.448276
## Average number of links: 8
## Non-symmetric neighbours list
## Link number distribution:
##
## 8
## 232
## 232 least connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 with 8 links
## 232 most connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 232 53824 1856 3364 60812
Plot the centroids
plot(mpsz_tapin$geometry, border="lightgrey")
plot(knb, coords, pch = 19, cex = 0.6, add = TRUE, col = "red")

Computing Gi statistics
Gi statistics using adaptive distance
fips <- order(mpsz_tapin$TOTAL_TAP_IN_VOLUME)
gi_in.adaptive <- localG(mpsz_tapin$TOTAL_TAP_IN_VOLUME, knb_lw)
tap_in_data.gi <- cbind(mpsz_tapin, as.matrix(gi_in.adaptive))
names(tap_in_data.gi)[5] <- "gstat_adaptive_tap_in"
Visualising local Gi
Mapping Gi values with adaptive distance weights
tm_shape(tap_in_data.gi) +
tm_fill(col = "gstat_adaptive_tap_in",
style = "pretty",
palette = "-RdBu",
title = "local Gi") +
tm_borders(alpha = 0.5)
## Warning: The shape tap_in_data.gi is invalid. See sf::st_is_valid
## Variable "gstat_adaptive_tap_in" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

2. Total tap out volume
mpsz_tapout <- left_join(mpsz_popsexage, d_tapout, by = c("SUBZONE_N" = "SUBZONE_N", "Pop" = "Pop")) %>%
drop_na(TOTAL_TAP_OUT_VOLUME)
wm_q2 <- poly2nb(mpsz_tapout, queen=TRUE)
rswm_q2 <- nb2listw(wm_q2, style="W", zero.policy = TRUE)
Computing local Moran’s I
fips2 <- order(mpsz_tapout$SUBZONE_N)
localMI2 <- localmoran(mpsz_tapout$Pop, rswm_q2)
head(localMI2)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 2 0.35363199 -0.004329004 0.1167109 1.0478037 0.1473645
## 3 0.69912394 -0.004329004 0.3179220 1.2475982 0.1060891
## 4 0.09266878 -0.004329004 0.1569532 0.2448369 0.4032914
## 5 0.14954331 -0.004329004 0.1891469 0.3538025 0.3617435
## 6 0.09818793 -0.004329004 0.1339576 0.2800992 0.3897007
## 7 0.06235776 -0.004329004 0.1339576 0.1822032 0.4277116
Plotting Moran scatterplot
nci2 <- moran.plot(mpsz_tapout$Pop, rswm_q2, labels=as.character(mpsz_tapout$SUBZONE_N), xlab="Population", ylab="Total Tap Out Volume")

Mapping the local Moran’s I
tapout.localMI2 <- cbind(mpsz_tapout,localMI2)
Mapping both local Moran’s I values and p-values
localMI.map2 <- tm_shape(tapout.localMI2) +
tm_fill(col = "Ii",
style = "pretty",
title = "local moran statistics") +
tm_borders(alpha = 0.5)
pvalue.map2 <- tm_shape(tapout.localMI2) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-Blues",
title = "local Moran's I p-values") +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map2, pvalue.map2, asp=1, ncol=2)
## Warning: The shape tapout.localMI2 is invalid. See sf::st_is_valid
## Variable "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Warning: The shape tapout.localMI2 is invalid. See sf::st_is_valid

Creating a LISA Cluster Map
Plotting Moran scatterplot
mpsz_tapout$Population <- scale(mpsz_tapout$Pop) %>% as.vector
nci2 <- moran.plot(mpsz_tapout$Population, rswm_q, labels=as.character(mpsz_tapout$SUBZONE_N), xlab="Population", ylab="Total tap in volume")

Preparing LISA cluster map
quadrant2 <- vector(mode="numeric",length=nrow(localMI2))
DV2 <- mpsz_tapout$Pop - mean(mpsz_tapout$Pop)
C_mI2 <- localMI2[,1] - mean(localMI2[,1])
signif <- 0.05
quadrant2[DV2 >0 & C_mI2>0] <- 4
quadrant2[DV2 <0 & C_mI2<0] <- 1
quadrant2[DV2 <0 & C_mI2>0] <- 2
quadrant2[DV2 >0 & C_mI2<0] <- 3
quadrant2[localMI2[,5]>signif] <- 0
Building LISA map
tapout.localMI2$quadrant2 <- quadrant2
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tm_shape(tapout.localMI2) +
tm_fill(col = "quadrant2", style = "cat", palette = colors[c(sort(unique(quadrant2)))+1], labels = clusters[c(sort(unique(quadrant2)))+1], popup.vars = c("Postal.Code")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
## Warning: The shape tapout.localMI2 is invalid. See sf::st_is_valid

Hot Spot and Cold Spot Area Analysis
Creating adaptive proximity matrix
coords2 <- sf::st_centroid(mpsz_tapout$geometry)
knb2 <- knn2nb(knearneigh(coords2, k=8, longlat = TRUE), row.names=row.names(mpsz_tapout$Pop))
## Warning in knearneigh(coords2, k = 8, longlat = TRUE): dnearneigh: longlat
## argument overrides object
knb_lw2 <- nb2listw(knb2, style = 'B')
summary(knb_lw2)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 232
## Number of nonzero links: 1856
## Percentage nonzero weights: 3.448276
## Average number of links: 8
## Non-symmetric neighbours list
## Link number distribution:
##
## 8
## 232
## 232 least connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 with 8 links
## 232 most connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 232 53824 1856 3364 60812
Plot the centroids
plot(mpsz_tapout$geometry, border="lightgrey")
plot(knb2, coords, pch = 19, cex = 0.6, add = TRUE, col = "red")

Computing Gi statistics
Gi statistics using adaptive distance
fips2 <- order(mpsz_tapout$TOTAL_TAP_OUT_VOLUME)
gi_out.adaptive <- localG(mpsz_tapout$TOTAL_TAP_OUT_VOLUME, knb_lw2)
tap_out_data.gi <- cbind(mpsz_tapout, as.matrix(gi_out.adaptive))
names(tap_out_data.gi)[5] <- "gstat_adaptive_out"
Visualising local Gi
Mapping Gi values with adaptive distance weights
tm_shape(tap_out_data.gi) +
tm_fill(col = "gstat_adaptive_out",
style = "pretty",
palette = "-RdBu",
title = "local Gi") +
tm_borders(alpha = 0.5)
## Warning: The shape tap_out_data.gi is invalid. See sf::st_is_valid
## Variable "gstat_adaptive_out" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
