packages = c('rgdal', 'spdep', 'tmap', 'tidyverse', 'sf', 'ggpubr')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/proj
## Linking to sp version: 1.3-2
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
## Loading required package: tmap
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: ggpubr
tapData <- read_csv("data/aspatial/passenger volume by busstop.csv")
## Parsed with column specification:
## cols(
## YEAR_MONTH = col_character(),
## DAY_TYPE = col_character(),
## TIME_PER_HOUR = col_double(),
## PT_TYPE = col_character(),
## PT_CODE = col_character(),
## TOTAL_TAP_IN_VOLUME = col_double(),
## TOTAL_TAP_OUT_VOLUME = col_double()
## )
#singapore <- st_read(dsn = "data/geospatial/master-plan-2014-subzone-boundary-no-sea/master-plan-2014-subzone-boundary-no-sea-shp", layer = "MP14_SUBZONE_NO_SEA_PL")
singapore <- st_read(dsn = "data/geospatial/d2", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `/Users/Amey/Desktop/Y2S2/IS415/assign1/data/geospatial/d2' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
population <- read_csv("data/aspatial/respopagesextod2011to2019.csv")
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## TOD = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
busStopLocation <- st_read(dsn="data/geospatial/BusStopLocation_Jan2020", layer= "BusStop")
## Reading layer `BusStop' from data source `/Users/Amey/Desktop/Y2S2/IS415/assign1/data/geospatial/BusStopLocation_Jan2020' using driver `ESRI Shapefile'
## Simple feature collection with 5040 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
population <-population %>%
filter(Time==2019) %>%
group_by(SZ) %>%
summarise(value=sum(Pop)) %>%
rename( "SUBZONE_N"=SZ)
population$SUBZONE_N <- toupper(population$SUBZONE_N)
singapore <- left_join(singapore, population, by="SUBZONE_N")
tapData <- tapData %>%
select(PT_CODE, TOTAL_TAP_IN_VOLUME, TOTAL_TAP_OUT_VOLUME)%>%
group_by(PT_CODE) %>%
summarise(TAP_IN = sum(TOTAL_TAP_IN_VOLUME),
TAP_OUT = sum(TOTAL_TAP_OUT_VOLUME))%>%
rename("BUS_STOP_N"=PT_CODE)
busStopLocation <- st_set_crs(busStopLocation, 3414)
singapore <- st_set_crs(singapore, 3414)
st_transform(busStopLocation,3414)
## Simple feature collection with 5040 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## CRS: EPSG:3414
## First 10 features:
## BUS_STOP_N BUS_ROOF_N LOC_DESC geometry
## 1 78221 B06 <NA> POINT (42227.96 39563.16)
## 2 63359 B01 HOUGANG SWIM CPLX POINT (34065.75 39047.46)
## 3 64141 B13 AFT JLN TELAWI POINT (36335.3 38525.74)
## 4 83139 B07 AFT JOO CHIAT PL POINT (36530.26 32981.18)
## 5 55231 B02 OPP SBST EAST DISTRICT POINT (29669.93 40841.51)
## 6 55351 B03 OPP FUDU WALK P/G POINT (28404.77 41300.92)
## 7 92089 B10 CHIJ KATONG CON POINT (37378.19 32166.81)
## 8 80271 B06 OPP BLK 2 POINT (33524.15 31938.86)
## 9 82059 B02 ONE AMBER POINT (35142.59 31500.49)
## 10 97089 B02 OPP SELARANG PK DRUG REH. POINT (44329.52 39204.74)
st_transform(singapore, 3414)
## Simple feature collection with 323 features and 16 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## CRS: EPSG:3414
## First 10 features:
## OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## 1 1 1 MARINA SOUTH MSSZ01 Y MARINA SOUTH
## 2 2 1 PEARL'S HILL OTSZ01 Y OUTRAM
## 3 3 3 BOAT QUAY SRSZ03 Y SINGAPORE RIVER
## 4 4 8 HENDERSON HILL BMSZ08 N BUKIT MERAH
## 5 5 3 REDHILL BMSZ03 N BUKIT MERAH
## 6 6 7 ALEXANDRA HILL BMSZ07 N BUKIT MERAH
## 7 7 9 BUKIT HO SWEE BMSZ09 N BUKIT MERAH
## 8 8 2 CLARKE QUAY SRSZ02 Y SINGAPORE RIVER
## 9 9 13 PASIR PANJANG 1 QTSZ13 N QUEENSTOWN
## 10 10 7 QUEENSWAY QTSZ07 N QUEENSTOWN
## PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## 1 MS CENTRAL REGION CR 5ED7EB253F99252E 2014-12-05 31595.84
## 2 OT CENTRAL REGION CR 8C7149B9EB32EEFC 2014-12-05 28679.06
## 3 SR CENTRAL REGION CR C35FEFF02B13E0E5 2014-12-05 29654.96
## 4 BM CENTRAL REGION CR 3775D82C5DDBEFBD 2014-12-05 26782.83
## 5 BM CENTRAL REGION CR 85D9ABEF0A40678F 2014-12-05 26201.96
## 6 BM CENTRAL REGION CR 9D286521EF5E3B59 2014-12-05 25358.82
## 7 BM CENTRAL REGION CR 7839A8577144EFE2 2014-12-05 27680.06
## 8 SR CENTRAL REGION CR 48661DC0FBA09F7A 2014-12-05 29253.21
## 9 QT CENTRAL REGION CR 1F721290C421BFAB 2014-12-05 22077.34
## 10 QT CENTRAL REGION CR 3580D2AFFBEE914C 2014-12-05 24168.31
## Y_ADDR SHAPE_Leng SHAPE_Area value geometry
## 1 29220.19 5267.381 1630379.3 0 MULTIPOLYGON (((31495.56 30...
## 2 29782.05 3506.107 559816.2 6630 MULTIPOLYGON (((29092.28 30...
## 3 29974.66 1740.926 160807.5 70 MULTIPOLYGON (((29932.33 29...
## 4 29933.77 3313.625 595428.9 13380 MULTIPOLYGON (((27131.28 30...
## 5 30005.70 2825.594 387429.4 10730 MULTIPOLYGON (((26451.03 30...
## 6 29991.38 4428.913 1030378.8 13780 MULTIPOLYGON (((25899.7 297...
## 7 30230.86 3275.312 551732.0 14780 MULTIPOLYGON (((27746.95 30...
## 8 30222.86 2208.619 290184.7 60 MULTIPOLYGON (((29351.26 29...
## 9 29893.78 6571.323 1084792.3 4430 MULTIPOLYGON (((20996.49 30...
## 10 30104.18 3454.239 631644.3 290 MULTIPOLYGON (((24472.11 29...
busStops <- left_join(tapData,busStopLocation)
## Joining, by = "BUS_STOP_N"
busStops <- busStops %>%
select(BUS_STOP_N, TAP_IN, TAP_OUT, geometry)
subZone <- singapore %>%
select(OBJECTID, SUBZONE_N, value, geometry)%>%
rename("Area"=OBJECTID)
xy <- st_coordinates(singapore)
x <- xy[,1]
y <- xy[,2]
pol <- xy[,5]
poly <- data.frame(pol,x,y)
points <- st_coordinates(busStopLocation)
x <- points[,1]
y <- points[,2]
object <- busStopLocation$BUS_STOP_N
points <- data.frame(object,x,y)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
x <- split(poly$x, poly$pol)
y <- split(poly$y, poly$pol)
todo <- 1:nrow(points)
Area <- rep.int("", nrow(points))
pol <- names(x)
# loop through polygons
for (i in 1:length(x)) {
# the vertices of i-th polygon
bnd <- cbind(x[[i]], y[[i]])
# points to allocate
xy <- with(points, cbind(x[todo], y[todo]))
inbnd <- in.out(bnd, xy)
# allocation
Area[todo[inbnd]] <- pol[i]
# update 'todo'
todo <- todo[!inbnd]
}
points$Area <- Area
points2 <- points %>%
select(object,Area) %>%
rename("BUS_STOP_N"=object)
table <- left_join(busStops,points2)
## Joining, by = "BUS_STOP_N"
table <- table %>%
select(BUS_STOP_N, TAP_IN, TAP_OUT, Area)
table$Area <- as.numeric(table$Area)
table <- left_join(subZone,table)
## Joining, by = "Area"
data <- table %>%
select( SUBZONE_N,value,TAP_IN,TAP_OUT) %>%
group_by(SUBZONE_N,value) %>%
summarise(TAP_IN=sum(TAP_IN),
TAP_OUT=sum(TAP_OUT)) %>%
drop_na()
stats <- lm(TAP_IN~value,data)
summary(stats)
##
## Call:
## lm(formula = TAP_IN ~ value, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -865527 -124146 -64237 39142 1949703
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.254e+05 2.110e+04 5.944 7.64e-09 ***
## value 1.926e+01 9.383e-01 20.524 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 298200 on 303 degrees of freedom
## Multiple R-squared: 0.5816, Adjusted R-squared: 0.5802
## F-statistic: 421.2 on 1 and 303 DF, p-value: < 2.2e-16
stats$coefficients
## (Intercept) value
## 125402.22897 19.25786
data$tapINres <- stats$residuals
plot(data$value, data$TAP_IN, main="Regression for Population on Tap In volumes in Buses",xlab="Population in Subzone",ylab="Tap In")
abline(reg = stats,col='red')
From the regression model, we can derive the following:
(1) The linear equation is y = 19.26x + 125402
(2) The slope of the regression line is 19.26, which signifies a positive relationship between population and tap in volumes in buses
(3) The intercept is 125402
(4) The R-squared value is 0.5816, which signifies that 58.16% of the variation in tap in data is due to the variation in population.
The following code will create a choropleth map in order to visualise the Tap In Residual Data among different subzones in Singapore.
data2 <- st_as_sf(data)
tm_shape(data2)+
tm_fill(col="tapINres",
style = "pretty",
palette = "-RdBu",
title = "Tap in Residual data")+
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha = 0.5)
## Variable(s) "tapINres" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
Negative residuals (Subzones in blue) imply over prediction of data whereas Positive residuals (Subzones in red) imply underprediction of data. Visually, spatial correlation for residuals is not evident but a more formal test needs to be conducted to confirm this. Therefore, global Moran’s test is performed on the residual data to validate if there is spatial correlation.
Firstly, weight matrix is created. The Queens weight matrix is created first in order to find out the average number of neighbours each subzone has and identify the subzones with lower number of connections.
wm_q <- poly2nb(data2, queen = TRUE)
summary(wm_q)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1844
## Percentage nonzero weights: 1.982263
## Average number of links: 6.045902
## Link number distribution:
##
## 2 3 4 5 6 7 8 9 10 11 12 14 17
## 6 11 27 79 76 50 36 13 2 2 1 1 1
## 6 least connected regions:
## 2 41 132 170 228 275 with 2 links
## 1 most connected region:
## 39 with 17 links
plot(data2$geometry, border="lightgrey")
plot(wm_q, st_centroid(data2$geometry), add = TRUE, col= "red")
From the summary data of the Queen’s weighed matrix, each subzone has an average of 6 connections. However, there are 6 subzones with only 2 links. In order to make our weighed matrix uniform, an adaptive weighed matrix is being used with each subzone being assigned 7 neighbours.
coords <- st_centroid(data2$geometry)
wm_knn8 <- knn2nb(knearneigh(coords, k=7))
plot(data2$geometry, border="lightgrey")
plot(wm_knn8, st_centroid(data2$geometry), add = TRUE, col= "green")
The following code will standarise the weight for every subzone. As 7 neighbours are assigned to each subzone, the weight will therefore be 1/7 for each neighbour.
rswm_knn8 <- nb2listw(wm_knn8, style="W", zero.policy = TRUE)
Spatial lag values assign the average Tap In value of neighbours for each subzone.
tapINres.lag <- lag.listw(rswm_knn8, data2$tapINres)
data2$tapinLAG <- tapINres.lag
tmap_mode("plot")
## tmap mode set to plotting
a <- tm_shape(data2)+
tm_fill(col = "tapINres",
title = "Tap In Residuals")+
tm_layout(legend.outside = TRUE) +
tm_borders(alpha = 0.5)
b <- tm_shape(data2)+
tm_fill(col = "tapinLAG",
title = "Tap in Residual Lag")+
tm_layout(legend.outside = TRUE) +
tm_borders(alpha = 0.5)
tmap_arrange(a, b,asp=1, nrow = 1, sync = TRUE)
## Variable(s) "tapINres" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Some legend labels were too wide. These labels have been resized to 0.38, 0.64, 0.41, 0.37, 0.37. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
## Variable(s) "tapinLAG" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Some legend labels were too wide. These labels have been resized to 0.41, 0.41, 0.41, 0.64, 0.44, 0.44, 0.44. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
The null hypothesis is that the residuals obtained from the regression model of the relationship between population and bus tap in is randomised among different subzones in Singapore.
The alternative hypothesis is that the residuals obtained from the regression model of the relationship between population and bus tap in is not randomised among different subzones in Singapore.
The alpha (significance level) for this test is 0.05.
lm.morantest(stats, listw=rswm_knn8, zero.policy=TRUE, alternative = "greater",
spChk=NULL, resfun=weighted.residuals, naSubset=TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = TAP_IN ~ value, data = data)
## weights: rswm_knn8
##
## Moran I statistic standard deviate = -0.35694, p-value = 0.6394
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## -0.014424884 -0.004254316 0.000811893
The p-value is 0.724, which is above 0.05 and hence we do not reject the null hypothesis. However, to confirm our findings, we will run a Monte Carlo simulation 1000 times.
set.seed(1234)
bperm= moran.mc(data2$tapINres, listw=rswm_knn8, nsim=999, zero.policy = TRUE, na.action=na.omit)
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: data2$tapINres
## weights: rswm_knn8
## number of simulations + 1: 1000
##
## statistic = -0.014425, observed rank = 371, p-value = 0.629
## alternative hypothesis: greater
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I", main = "Monte Carlo Simulation Findings")
abline(v=-0.023, col="red")
The Moran’s I in the Monte Carlo simulation is in the middle, validating our findings of an insignificant p-value from the Moran’s I test.
The p-value is 0.724, which is above 0.05 and hence we do not reject the null hypothesis. The moran’s I value is -0.0223 which is very close to 0, implying that the residual data obtained from the population-tapin regression model conforms to spatial randomness among different subzones in Singapore. From the results of the test, we can conclude that the errors in the regression model are independent of one another, and their values do not depend on the value of residuals at neighbouring locations, i.e. there is no spatial correlation in the residuals.
MS <- moran.plot(data2$tapINres, rswm_knn8, zero.policy = TRUE, spChk=FALSE, labels=as.character(data2$SUBZONE_N), xlab="residual", ylab="Spatially Lag residual")
Spatial Correlograms are plotted in order to identify the autocorrelation of spatial observations when we increase the distance (lag) between them.
MI_corr <- sp.correlogram(wm_knn8, data2$tapINres, order=6, method="I", style="W")
plot(MI_corr, main="Spatial Correlogram")
The findings from the above graph show that the Moran’s I value are all around 0 even when we increase the distance between them.
Now, the Tap In Data itself is analysed. The Data is first analysed globally through Global Moran’s I test in order to find if there is clustering and then analysed locally through local Moran’s I test in order to find signifcant clusters, outliers, cold spots and hot spots.
The null hypothesis is that the volume of Bus Tap In is randomised among different subzones in Singapore.
The alternative hypothesis is that the volume of Bus Tap In is not randomised among different subzones in Singapore.
The alpha (significance level) for this test is 0.05.
moran.test(data2$TAP_IN, listw=rswm_knn8, zero.policy = TRUE, na.action=na.omit,alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: data2$TAP_IN
## weights: rswm_knn8
##
## Moran I statistic standard deviate = 6.2505, p-value = 4.092e-10
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1714396388 -0.0032894737 0.0007814573
The p-value is 4.092e-10, which is below 0.05 and hence we reject the null hypothesis. However, to confirm our findings, we will run a Monte Carlo simulation 1000 times.
set.seed(1234)
bperm= moran.mc(data2$TAP_IN, listw=rswm_knn8, nsim=999, zero.policy = TRUE, na.action=na.omit )
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: data2$TAP_IN
## weights: rswm_knn8
## number of simulations + 1: 1000
##
## statistic = 0.17144, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I", main="Findings from Monte Carlo Simulation")
abline(v=0.171, col="red")
The Moran’s I value is in the extreme right when compared with the Monte Carlo simulation i, validating our findings of an significant p-value from the Moran’s I test.
The p-value is 4.092e-10, which is below 0.05 and hence we reject the null hypothesis. Therefore, the volume of Bus Tap In is not randomised among different subzones in Singapore, implying a spatial relationship between them. Furthermore, the Moran’s I value is 0.171 which is greather than 0, implying that the Tap In data is positively clustered among different subzones in Singapore.
From the code below, Local Moran’s I test is performed locally through which I value and p-value is obtained for every subzone.
fips <- order(data2$SUBZONE_N)
localMI <- localmoran(data2$TAP_IN, rswm_knn8)
head(localMI)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.11306988 -0.003289474 0.1329659 0.3191032 0.37482413
## 2 -0.05217577 -0.003289474 0.1329659 -0.1340655 0.55332460
## 3 -0.06442490 -0.003289474 0.1329659 -0.1676574 0.56657360
## 4 0.07531885 -0.003289474 0.1329659 0.2155750 0.41465953
## 5 0.50976203 -0.003289474 0.1329659 1.4069893 0.07971528
## 6 -0.24083763 -0.003289474 0.1329659 -0.6514506 0.74262218
#printCoefmat(data.frame(localMI[fips,], row.names=data2$SUBZONE_N[fips]), check.names=FALSE)
data2.localMI <- cbind(data2,localMI)
A choropleth map of the local moran’s I value is plotted beside a choropleth map indicating the regions which have a significant p-value, i.e. a p-value which is less than 0.05.
tmap_mode("plot")
## tmap mode set to plotting
localMI.map <- tm_shape(data2.localMI) +
tm_polygons(col = "Ii",
style = "pretty",
title = "Local Moran's I value",
palette= "-RdBu") +
tm_layout(legend.outside = TRUE) +
tm_view(set.zoom.limits = c(10,16)) +
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(data2.localMI) +
tm_polygons(col = "Pr.z...0.",
breaks=c(-Inf,0.05, Inf),
palette=c("blue","lightgrey"),
title = "Local Moran's p-values") +
tm_view(set.zoom.limits = c(10,16)) +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp=1, nrow=1, sync = TRUE)
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
The dark blue regions on the map on the right shows the regions which have p-values less than 0.05 and is hence statistically significant to determine these regions to be either clusters or outliers. Clusters exhibit positive I values, which are shaded red in the map on the left. Outliers exhibit negative I values, which are shaded blue in the map on the left.
To understand characteristics of these regions, LISA cluster map is created!
nci <- moran.plot(data2$TAP_IN, rswm_knn8, labels=as.character(data2$SUBZONE_N), xlab="Residual Values", ylab="Spatially Lag residuals")
## Moran Scatterplot with standardised variable
data2$Z.TAP_IN <- scale(data2$TAP_IN) %>% as.vector
nci2 <- moran.plot(data2$Z.TAP_IN, rswm_knn8, labels=as.character(data2$SUBZONE_N), xlab="z-residual", ylab="Spatially Lag z-residual")
## Lisa map preperation
quadrant <- vector(mode="numeric",length=nrow(localMI))
TAP_IN.lag <- lag.listw(rswm_knn8, data2$TAP_IN)
data2$TAP_IN_LAG <- TAP_IN.lag
#DV <- data2$tapINres - mean(data2$tapINres)
#C_mI <- localMI[,1] - mean(localMI[,1])
DV <- data2$Z.TAP_IN
C_mI <- data2$TAP_IN_LAG
signif <- 0.05
quadrant[DV >0 & C_mI>0] <- 4
quadrant[DV <0 & C_mI<0] <- 1
quadrant[DV <0 & C_mI>0] <- 2
quadrant[DV >0 & C_mI<0] <- 3
quadrant[localMI[,5]>signif] <- 0
data2.localMI$quadrant <- quadrant
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(data2.localMI) +
tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("SUBZONE_N")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
As seen in the map above, there are various subzones in the “high-high” quadrant. High-High refers to a high tap-in value in the subzone as well as high tap in values in their neighbours. Groupings of these regions can be found majorly in the East part of Singapore (Geylang East, Kampong Ubi, Kembangan, Frankel, Bedok South, Bedok North, Bedok Reservoir, Tampines West, Simei, Tampines East, Pasir Drive, Pasir Ris Central ), the North part of Singapore (North Coast, Senoko North, Woodlands regional center, Woodlands East, Yishun West, Yishun Central, Yishun East, Yishun South ) and some locations in North-East and Western region of Singapore.
The Gi value is computed for each subzone through which the p-value is calculated for each subzone.
fips <- order(data2$TAP_IN)
gi.fixed <- localG(data2$TAP_IN, rswm_knn8)
gi.fixed
## [1] -0.692900774 0.172496682 -0.668196746 -0.294558240 0.536332011
## [6] -0.696162886 2.129695313 0.532449385 -1.181351913 0.694748729
## [11] 0.107348685 -1.531747799 3.232264605 2.722712611 3.735994412
## [16] 2.594988541 -0.887240437 0.394942062 -1.629446616 0.145190717
## [21] -0.121252791 -1.429223575 0.970492360 3.121439941 1.066412123
## [26] -1.459856689 0.892843837 -0.674421021 0.263402345 -1.180778867
## [31] -0.262078921 0.241689495 0.482820521 1.183358197 -0.644068095
## [36] -0.296996897 -1.528680977 -1.525968283 -1.202574122 -0.705526149
## [41] -0.025207695 2.183129496 -1.399743343 0.651968391 2.575523156
## [46] -1.380093101 -1.417732394 0.168704327 0.787679345 1.869132827
## [51] -0.840526796 -0.948072043 -1.404607237 -0.571890079 -0.270211281
## [56] 0.363309647 0.505555665 -1.496898947 -0.877334557 2.845893828
## [61] -1.028824493 -0.931242870 0.117839526 0.453786016 -0.390213249
## [66] -1.310186876 -0.774584595 -1.247533490 1.025531928 -1.067061602
## [71] -0.122104758 0.238303878 -1.130843106 -1.150449711 -0.507741643
## [76] 3.520703988 -1.106992818 3.176698908 -0.582679185 -0.406249345
## [81] 1.926280494 -0.878473937 0.364488540 -0.617988496 3.363767246
## [86] 0.169356267 -1.661863547 -1.643735001 -0.441271062 -1.334141426
## [91] -0.255490895 -1.092166186 -0.803533470 0.021318542 0.070898501
## [96] 1.237004210 0.738979610 1.037638786 -1.153141837 0.136473510
## [101] -1.053292027 -0.424941451 -0.995808166 -0.140325818 -0.093033083
## [106] -0.760001333 0.227250133 -0.318519665 0.925137434 -0.359474559
## [111] 0.798363208 -0.448951047 -0.945548445 -0.643936347 -0.336431147
## [116] 2.779277432 0.733786473 2.665771735 0.733241570 0.917605234
## [121] 4.233850465 -0.890573064 1.024216797 1.139159101 0.167066378
## [126] 2.397226016 1.828967530 1.069861902 -1.128886918 -1.088992999
## [131] -0.727684889 -0.880730712 -0.898070682 -1.048462130 -0.564019370
## [136] 0.768360223 0.547697849 0.828635452 0.850731008 0.019274897
## [141] 0.008103716 -1.462838413 0.901951648 -1.167440633 1.172921472
## [146] 1.856883465 1.752318317 -0.904386423 -0.715862166 -0.008855043
## [151] -1.649881826 1.010315661 -0.479683807 0.934323700 1.029944075
## [156] -1.262043098 -0.860171690 3.012686796 -0.404665030 -0.708757536
## [161] -0.084524774 1.453402912 -1.175743160 -0.501678651 -0.151742887
## [166] 0.225507819 -0.709281172 2.403663038 1.876955899 1.219520054
## [171] -0.892724162 -1.125659383 -0.911951474 -1.037582437 0.023937963
## [176] 0.073782694 -1.089280099 -0.736140572 2.747764933 2.319360584
## [181] 0.346560485 0.039216373 2.957887428 -0.855057533 -0.186671100
## [186] -1.439330704 0.128826223 -0.752229949 -1.066700196 0.395871073
## [191] -1.344948377 -1.027328469 -1.599518730 -1.838681063 -1.572931555
## [196] -0.923679777 -0.510232156 0.960153399 1.113352974 -0.753062198
## [201] -1.790872040 -0.237967235 -0.365829026 -1.092362001 1.204032147
## [206] -1.344543446 -0.661625567 1.491506783 -1.921599832 0.266824871
## [211] -1.055907756 0.786494166 -0.635899756 0.269773642 0.001075172
## [216] 0.702060882 0.860166739 -0.744635289 0.702039689 -0.284810207
## [221] 1.009054191 -0.978727501 0.979199360 -0.769548890 0.514445516
## [226] 0.465856184 2.571500972 -0.658771796 -0.072770467 1.193367159
## [231] 1.244437715 0.992805367 1.077400880 -1.590367947 1.050033551
## [236] 5.756743968 -0.235868005 0.477753081 -1.651360039 -0.781645731
## [241] -1.789586127 -0.728530134 0.911556513 -0.575906902 -0.674033239
## [246] -0.333865005 0.160812412 2.024036555 4.353318272 4.797062248
## [251] -0.922440338 -0.847584309 -1.203576371 -0.047675090 -0.855724331
## [256] 0.982722665 -0.102005190 -0.405576704 0.391657742 0.845271379
## [261] -1.440658428 -0.640719692 -0.468776634 -0.696080866 0.023594178
## [266] 1.184571733 0.693770169 -0.427800679 2.015648468 0.842967499
## [271] -1.837531496 -1.487705259 -1.514537581 -1.803709311 -1.752212766
## [276] 1.557431030 -0.825423689 -1.280570135 -0.331240998 -0.109162346
## [281] 0.583079597 -1.282618350 1.320646393 3.431667973 0.246756024
## [286] 2.702048880 1.232042500 1.431559276 0.919350410 1.767497392
## [291] 2.034615790 -1.033767491 5.684970762 0.329862564 0.437249562
## [296] 0.496210255 -0.153259856 0.048939378 1.332971654 1.824028289
## [301] 1.863602796 1.381284025 0.918718046 1.011728104 0.877530388
## attr(,"gstari")
## [1] FALSE
## attr(,"call")
## localG(x = data2$TAP_IN, listw = rswm_knn8)
## attr(,"class")
## [1] "localG"
data2.gi <- cbind(data2, as.matrix(gi.fixed))
names(data2.gi)[9] <- "gstat"
pvalue2sided<-2*pnorm(-abs(data2.gi$gstat))
data2.gi$pval <- pvalue2sided
a<-tm_shape(data2.gi) +
tm_fill(col = "gstat",
style="pretty",
palette="-RdBu",
title = "Gi values") +
tm_view(set.zoom.limits = c(10,17)) +
tm_borders(alpha = 0.5)
b<-tm_shape(data2.gi) +
tm_fill(col = "pval",
breaks=c(-Inf,0.05, Inf),
palette=c("blue","lightgrey"),
title = "p-values") +
tm_view(set.zoom.limits = c(10,17)) +
tm_borders(alpha = 0.5)
tmap_arrange(a, b, asp=1, nrow=1, sync = TRUE)
## Variable(s) "gstat" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
data2.gi <- data2.gi %>%
mutate("Type"= case_when(pval<0.05 & gstat<=0 ~ "Significant Cold Spot",
pval>=0.05 & gstat<=0 ~ "Insignificant Cold Spot",
pval<0.05 & gstat>0 ~ "Significant Hot Spot",
pval>=0.05 & gstat>0 ~ "Insignificant Hot Spot"))
tm_shape(data2.gi) +
tm_fill(col = "Type",
style = "pretty",
palette= c("lightblue","lightcoral","red"),
title = "Gi Analysis") +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha = 0.5)
In the choropleth map above, subzones shaded in red are hot spot areas and subzones shaded in blue are the cold spot areas. The subzones shaded in dark red are significant Hot Spots, however, there are no significant cold spots. Hot Spots are present in all regions other than Central Singapore (Eastern, Western and Northern), indicating excessive usage of bus transportation in residential area as compared to the business district. The cold spot areas, on the other hand, are mainly subzones in Central Singapore.
stats2 <- lm(TAP_OUT~value,data)
summary(stats2)
##
## Call:
## lm(formula = TAP_OUT ~ value, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -824702 -123573 -59179 34208 1643316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.259e+05 2.043e+04 6.163 2.27e-09 ***
## value 1.917e+01 9.088e-01 21.088 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 288800 on 303 degrees of freedom
## Multiple R-squared: 0.5948, Adjusted R-squared: 0.5934
## F-statistic: 444.7 on 1 and 303 DF, p-value: < 2.2e-16
stats2$coefficients
## (Intercept) value
## 125936.35239 19.16619
data$tapOUTres <- stats2$residuals
plot(data$value, data$TAP_OUT, main="Regression for Population on Tap Out volumes in Buses",xlab="Population in Subzone",ylab="Tap Out")
abline(reg = stats2,col='red')
From the regression model, we can derive the following:
(1) The linear equation is y = 19.17x + 125936
(2) The slope of the regression line is 19.17, which signifies a positive relationship between population and tap in volumes in buses
(3) The intercept is 125936
(4) The R-squared value is 0.5948, which signifies that 59.48% of the variation in tap in data is due to the variation in population.
plot(data$value, data$TAP_OUT, main="Regression for Population on Tap Out volumes in Buses",xlab="Population in Subzone",ylab="Tap Out")
abline(reg = stats2,col='red')
abline(reg = stats,col='blue')
The co-inciding lines suggest that both tap in and tap out follow a similar relationship with the population in the subzone!
Coming back to the analysis of Tap Out data, we will now find out if any spatial autocorrelation is present in the residual data of the regression model. ## Spatial Autocorrelaion
tmap_mode("plot")
## tmap mode set to plotting
data3 <- st_as_sf(data)
tm_shape(data3)+
tm_fill(col="tapOUTres",
style = "pretty",
palette = "-RdBu",
title = "Tap Out Residual data")+
tm_borders(alpha = 0.5)
## Variable(s) "tapOUTres" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_mode("plot")
## tmap mode set to plotting
Negative residuals (Subzones in blue) imply over prediction of data whereas Positive residuals (Subzones in red) imply underprediction of data. Visually, spatial correlation for residuals is not evident but a more formal test needs to be conducted to confirm this. Therefore, global Moran’s test is performed on the residual data to validate if there is spatial correlation.
The null hypothesis is that the residuals obtained from the regression model of the relationship between population and bus tap out is randomised among different subzones in Singapore.
The alternative hypothesis is that the residuals obtained from the regression model of the relationship between population and bus tap out is not randomised among different subzones in Singapore.
The alpha (significance level) for this test is 0.05.
lm.morantest(stats2, listw=rswm_knn8, zero.policy=TRUE, alternative = "greater",
spChk=NULL, resfun=weighted.residuals, naSubset=TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = TAP_OUT ~ value, data = data)
## weights: rswm_knn8
##
## Moran I statistic standard deviate = 0.058411, p-value = 0.4767
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## -0.002589963 -0.004254316 0.000811893
The p-value is 0.478, which is above 0.05 and hence we do not reject the null hypothesis. However, to confirm our findings, we will run a Monte Carlo simulation 1000 times. ### Monte Carlo simulation for Moran’s I
set.seed(1234)
bperm= moran.mc(data3$tapOUTres, listw=rswm_knn8, nsim=999, zero.policy = TRUE, na.action=na.omit)
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: data3$tapOUTres
## weights: rswm_knn8
## number of simulations + 1: 1000
##
## statistic = -0.00259, observed rank = 529, p-value = 0.471
## alternative hypothesis: greater
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I", main = "Findings from Monte Carlo Simulation")
abline(v=-0.002589963, col="red")
The Moran’s I in the Monte Carlo simulation is in the middle, validating our findings of an insignificant p-value from the Moran’s I test.
The p-value is 0.478, which is above 0.05 and hence we do not reject the null hypothesis. The moran’s I value is -0.002589963 which is very close to 0, implying that the residual data obtained from the population-tapin regression model conforms to spatial randomness among different subzones in Singapore. From the results of the test, we can conclude that the errors in the regression model are independent of one another, and their values do not depend on the value of residuals at neighbouring locations, i.e. there is no spatial correlation in the residuals.
MS <- moran.plot(data3$tapOUTres, rswm_knn8, zero.policy = TRUE, spChk=FALSE, labels=as.character(data2$SUBZONE_N), xlab="residual", ylab="Spatially Lag residual")
Spatial Correlograms are plotted in order to identify the autocorrelation of spatial observations when we increase the distance (lag) between them.
MI_corr <- sp.correlogram(wm_knn8, data2$tapINres, order=6, method="I", style="B")
plot(MI_corr, main = "Spatial Correlogram")
The findings from the above graph show that the Moran’s I value are all around 0 even when we increase the distance between them.
The null hypothesis is that the volume of Bus Tap Out is randomised among different subzones in Singapore.
The alternative hypothesis is that the volume of Bus Tap Out is not randomised among different subzones in Singapore.
The alpha (significance level) for this test is 0.05.
moran.test(data3$TAP_OUT, listw=rswm_knn8, zero.policy = TRUE, na.action=na.omit,alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: data3$TAP_OUT
## weights: rswm_knn8
##
## Moran I statistic standard deviate = 6.4916, p-value = 8.493e-11
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1783104209 -0.0032894737 0.0007825763
The p-value is 8.493e-11, which is below 0.05 and hence we reject the null hypothesis. However, to confirm our findings, we will run a Monte Carlo simulation 1000 times.
set.seed(1234)
bperm= moran.mc(data3$TAP_OUT, listw=rswm_knn8, nsim=999, zero.policy = TRUE, na.action=na.omit)
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: data3$TAP_OUT
## weights: rswm_knn8
## number of simulations + 1: 1000
##
## statistic = 0.17831, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I", main="Findings from Monte Carlo Simulation")
abline(v=0.19, col="red")
The Moran’s I value is in the extreme right when compared with the Monte Carlo simulation i, validating our findings of an significant p-value from the Moran’s I test.
The p-value is 8.493e-11, which is below 0.05 and hence we reject the null hypothesis. Therefore, the volume of Bus Tap Out is not randomised among different subzones in Singapore, implying a spatial relationship between them. Furthermore, the Moran’s I value is 0.171 which is greather than 0, implying that the Tap In data is positively clustered among different subzones in Singapore.
From the code below, Local Moran’s I test is performed locally through which I value and p-value is obtained for every subzone.
fips <- order(data3$SUBZONE_N)
localMI <- localmoran(data3$TAP_OUT, rswm_knn8)
head(localMI)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.09232491 -0.003289474 0.1331512 0.2620298 0.3966492
## 2 -0.08047330 -0.003289474 0.1331512 -0.2115211 0.5837597
## 3 -0.10798507 -0.003289474 0.1331512 -0.2869167 0.6129119
## 4 0.09849363 -0.003289474 0.1331512 0.2789350 0.3901473
## 5 0.38825119 -0.003289474 0.1331512 1.0730112 0.1416330
## 6 -0.21862028 -0.003289474 0.1331512 -0.5901108 0.7224418
#printCoefmat(data.frame(localMI[fips,], row.names=data2$SUBZONE_N[fips]), check.names=FALSE)
data3.localMI <- cbind(data3,localMI)
A choropleth map of the Local Moran’s I value is plotted beside a choropleth map indicating the regions which have a significant p-value, i.e. a p-value which is less than 0.05.
tmap_mode("plot")
## tmap mode set to plotting
localMI.map <- tm_shape(data3.localMI) +
tm_fill(col = "Ii",
style = "pretty",
title = "Local Moran's I value",
palette= "-RdBu") +
tm_layout(legend.outside = TRUE) +
tm_view(set.zoom.limits = c(10,16)) +
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(data3.localMI) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf,0.05, Inf),
palette=c("blue","lightgrey"),
title = "local Moran's I p-values") +
tm_layout(legend.outside = TRUE) +
tm_view(set.zoom.limits = c(10,16)) +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp=1, nrow=1, sync = TRUE)
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Some legend labels were too wide. These labels have been resized to 0.58. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
tmap_mode("plot")
## tmap mode set to plotting
The dark blue regions on the map on the right shows the regions which have p-values less than 0.05 and is hence statistically significant to determine these regions to be either clusters or outliers. Clusters exhibit positive I values, which are shaded red in the map on the left. Outliers exhibit negative I values, which are shaded blue in the map on the left.
To understand characteristics of these regions, LISA cluster map is created! # Creating LISA clusters map
nci <- moran.plot(data3$TAP_OUT, rswm_knn8, labels=as.character(data3$SUBZONE_N), xlab="Residual Values", ylab="Spatially Lag residuals")
data3$Z.TAP_OUT <- scale(data3$TAP_OUT) %>% as.vector
nci2 <- moran.plot(data3$Z.TAP_OUT, rswm_knn8, labels=as.character(data3$SUBZONE_N), xlab="z-residual", ylab="Spatially Lag z-residual")
quadrant <- vector(mode="numeric",length=nrow(localMI))
TAP_OUT.lag <- lag.listw(rswm_knn8, data3$TAP_OUT)
data3$TAP_OUT_LAG <- TAP_OUT.lag
#DV <- data2$tapINres - mean(data2$tapINres)
#C_mI <- localMI[,1] - mean(localMI[,1])
DV <- data3$Z.TAP_OUT
C_mI <- data3$TAP_OUT_LAG
signif <- 0.05
quadrant[DV >0 & C_mI>0] <- 4
quadrant[DV <0 & C_mI<0] <- 1
quadrant[DV <0 & C_mI>0] <- 2
quadrant[DV >0 & C_mI<0] <- 3
quadrant[localMI[,5]>signif] <- 0
data3.localMI$quadrant <- quadrant
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(data3.localMI) +
tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("SUBZONE_N")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
As seen in the map above, there are various subzones in the “high-high” quadrant. High-High refers to a high tap-out value in the subzone as well as high tap in values in their neighbours. Groupings of these regions can be found majorly in the East part of Singapore (Geylang East, Kampong Ubi, Kembangan, Frankel, Bedok South, Bedok North, Tampines West, Simei, Tampines East, Pasir Drive, Pasir Ris Central ), the North part of Singapore (North Coast, Senoko North, Woodlands regional center, Woodlands East, Yishun West, Yishun Central, Yishun East ) and some locations in North-East and Western region of Singapore.
We will now perform another local statistics to find out hot spots and cold spots.
The Gi value is computed for each subzone through which the p-value is calculated for each subzone.
fips <- order(data3$TAP_OUT)
gi.fixed <- localG(data3$TAP_OUT, rswm_knn8)
data3.gi <- cbind(data3, as.matrix(gi.fixed))
names(data3.gi)[9] <- "gstat"
pvalue2sided<-2*pnorm(-abs(data3.gi$gstat))
data3.gi$pval <- pvalue2sided
summary(data3.gi$gstat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.85784 -0.84453 -0.02267 0.15080 0.93063 5.88742
a<-tm_shape(data3.gi) +
tm_fill(col = "gstat",
style = "pretty",
palette="-RdBu",
title = "local Gi") +
tm_view(set.zoom.limits = c(10,17)) +
tm_borders(alpha = 0.5)
b<-tm_shape(data3.gi) +
tm_fill(col = "pval",
breaks=c(-Inf,0.05, Inf),
palette=c("blue1","grey90"),
title = "p-values") +
tm_view(set.zoom.limits = c(10,17)) +
tm_borders(alpha = 0.5)
tmap_arrange(a, b, asp=1, nrow=1, sync = TRUE)
## Variable(s) "gstat" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
These two maps can be combined to obtain values a single map which can be classified as follows:
(1) Insignificant Cold Spot [Negative Gi Value, p-value>=0.05]
(2) Significant Cold Spot [Negative Gi Value, p-value<0.05]
(3) Insignificant Hot Spot [Positive Gi Value, p-value>=0.05]
(4) Significant Hot Spot [Positive Gi Value, p-value<0.05]
data3.gi <- data3.gi %>%
mutate("Type"= case_when(pval<0.05 & gstat<=0 ~ "Significant Cold Spot",
pval>=0.05 & gstat<=0 ~ "Insignificant Cold Spot",
pval<0.05 & gstat>0 ~ "Significant Hot Spot",
pval>=0.05 & gstat>0 ~ "Insignificant Hot Spot"))
tm_shape(data3.gi) +
tm_fill(col = "Type",
style = "pretty",
palette= c("lightblue","lightcoral","red"),
title = "Gi Analysis") +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha = 0.5)
tmap_mode("plot")
## tmap mode set to plotting
In the choropleth map above, subzones shaded in red are hot spot areas and subzones shaded in blue are the cold spot areas. The subzones shaded in dark red are significant Hot Spots, however, there are no significant cold spots. Hot Spots are present in all regions other than Central Singapore (Eastern, Western and Northern), indicating excessive usage of bus transportation in residential area as compared to the business district. The cold spot areas, on the other hand, are mainly subzones in Central Singapore. This resembles to the findings from the Tap In Data.
Moran’s test was widely used in this assignment to determine the presence of spatial autocorrelation. Global Moran’s I test for Tap in and Tap out residual data indicated that there is autocorrelation among the residual data. This validates that there is no systematic error in the regression models, and the deviations are justified. By performing Global Moran’s I test on Tap in and Tap out data, a positive autocorrelation was found indicating that there is spatial relationship between the tap data and subzones. Two statistical tests in local analysis (Getis-Ord and Local Moran’s) indicated the presence of “high” clusters among the suburban regions of Singapore.