Since 2013, Singapore has an active Open Data initiative. It aims to enhance transparency, public participation, and collaboration in the nation. The nation has big data ambitions and believes that the bountiful pool of available data should be used to gain new insight that will improve economic welfare. With the inception of The Smart Nation vision in 2017, Open Data is seen as a necessary component of this initiative, especially in the promotion of public-private collaborations (co-innovation). However, most of the effort to date tend to focus on hosting online open data portal by various government agency such as data.gov.sg, SLA OneMap, URA SPACE and LTA LTA DataMall , just to mention a few of them. There are very little work on how to integrate information shared by these agencies to gain better understanding on national development or public services issues.
packages = c('rgdal', 'sf', 'spdep', 'tmap', 'tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/vanes/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/vanes/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version: 1.4-1
## Loading required package: sf
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: tmap
## Loading required package: tidyverse
## -- Attaching packages ---- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##Importing data
#Importing subzone data
subzone = st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `C:\IS415\take home exercise\take home exercise 1\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
#Importing bus stops
busstop <- st_read(dsn = "data/geospatial",
layer = "BusStop")
## Reading layer `BusStop' from data source `C:\IS415\take home exercise\take home exercise 1\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5040 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs
#Importing passenger(i.e tap in & tap out data)
passenger<- read_csv('data/aspatial/busstop.csv')
## Parsed with column specification:
## cols(
## YEAR_MONTH = col_character(),
## DAY_TYPE = col_character(),
## TIME_PER_HOUR = col_double(),
## PT_TYPE = col_character(),
## PT_CODE = col_character(),
## TOTAL_TAP_IN_VOLUME = col_double(),
## TOTAL_TAP_OUT_VOLUME = col_double()
## )
#Importing population data
population <- read_csv("data/aspatial/respopagsex2000to2018.csv")
## Parsed with column specification:
## cols(
## PA = col_character(),
## SZ = col_character(),
## AG = col_character(),
## Sex = col_character(),
## Pop = col_double(),
## Time = col_double()
## )
#subzone data
st_crs(subzone)
## Coordinate Reference System:
## No user input
## wkt:
## PROJCS["SVY21",
## GEOGCS["SVY21[WGS84]",
## DATUM["WGS_1984",
## SPHEROID["WGS_84",6378137.0,298.257223563]],
## PRIMEM["Greenwich",0.0],
## UNIT["Degree",0.0174532925199433],
## AUTHORITY["EPSG","4326"]],
## PROJECTION["Transverse_Mercator"],
## PARAMETER["False_Easting",28001.642],
## PARAMETER["False_Northing",38744.572],
## PARAMETER["Central_Meridian",103.8333333333333],
## PARAMETER["Scale_Factor",1.0],
## PARAMETER["Latitude_Of_Origin",1.366666666666667],
## UNIT["Meter",1.0]]
subzone <-st_transform(subzone, 3414)
#busstop data
st_crs(busstop)
## Coordinate Reference System:
## No user input
## wkt:
## PROJCS["SVY21",
## GEOGCS["GCS_WGS_1984",
## DATUM["WGS_1984",
## SPHEROID["WGS_84",6378137.0,298.257223563]],
## PRIMEM["Greenwich",0.0],
## UNIT["Degree",0.0174532925199433],
## AUTHORITY["EPSG","4326"]],
## PROJECTION["Transverse_Mercator"],
## PARAMETER["False_Easting",28001.642],
## PARAMETER["False_Northing",38744.572],
## PARAMETER["Central_Meridian",103.8333333333333],
## PARAMETER["Scale_Factor",1.0],
## PARAMETER["Latitude_Of_Origin",1.366666666666667],
## UNIT["Meter",1.0]]
busstop <- st_transform(busstop, 3414)
##Population data preparation
population_2018 <- population %>%
filter(Time == 2018) %>%
mutate(SUBZONE_N = toupper(SZ)) %>%
group_by(SUBZONE_N) %>%
summarize(SUBZONE_POPULATION = sum(Pop))
#Passenger Data Preparation
passengers <- passenger %>%
group_by(PT_CODE) %>%
summarize(total_tap_in = sum(TOTAL_TAP_IN_VOLUME), total_tap_out = sum(TOTAL_TAP_OUT_VOLUME))
#Joining population and subzone
subzone_population <- left_join(subzone, population_2018, by = "SUBZONE_N")
subzone_pop_busstop <- st_join(subzone_population, busstop)
combined <- left_join(subzone_pop_busstop, passengers,
by = c("BUS_STOP_N" = "PT_CODE"))
#Cleaning data by removing NA and duplicates
combined <- na.exclude(combined)
#Tap in data
tap_in_subzone <- combined %>%
group_by(SUBZONE_N) %>%
summarize(total_tap_in = sum(total_tap_in))
tap_in_subzone <- left_join(tap_in_subzone, population_2018,
by = c('SUBZONE_N' = 'SUBZONE_N'))
#Tap out data
tap_out_subzone <- combined %>%
group_by(SUBZONE_N) %>%
summarize(total_tap_out= sum(total_tap_out))
tap_out_subzone <- left_join(tap_out_subzone, population_2018,
by = c('SUBZONE_N' = 'SUBZONE_N'))
tm_shape(combined) +
tm_fill(col = "SUBZONE_POPULATION",
style = "pretty",
title = "Population Volume") +
tm_borders(alpha = 0.5)
## Warning: The shape combined is invalid. See sf::st_is_valid
tm_shape(tap_in_subzone) +
tm_fill(col = "total_tap_in",
style = "pretty",
title = "Tap In Volume") +
tm_borders(alpha = 0.5)
## Warning: The shape tap_in_subzone is invalid. See sf::st_is_valid
tm_shape(tap_out_subzone) +
tm_fill(col = "total_tap_out",
style = "pretty",
title = "Tap Out Volume") +
tm_borders(alpha = 0.5)
## Warning: The shape tap_out_subzone is invalid. See sf::st_is_valid
#Tap in
#linear model
tap_in <- tap_in_subzone$total_tap_in
population <- tap_in_subzone$SUBZONE_POPULATION
linearMod <- lm(tap_in ~ population)
summary(linearMod)
##
## Call:
## lm(formula = tap_in ~ population)
##
## Residuals:
## Min 1Q Median 3Q Max
## -806830 -121827 -68359 44944 1953208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.219e+05 2.106e+04 5.789 1.77e-08 ***
## population 1.902e+01 9.416e-01 20.205 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 298200 on 303 degrees of freedom
## Multiple R-squared: 0.574, Adjusted R-squared: 0.5726
## F-statistic: 408.3 on 1 and 303 DF, p-value: < 2.2e-16
linearMod$coefficients
## (Intercept) population
## 121896.90504 19.02443
#Tap In and population relationship using ggplot and scatter plot
ggplot(data=tap_in_subzone,aes(x=SUBZONE_POPULATION,y=total_tap_in)) + geom_point(size =1) + geom_smooth(method = "lm") + ggtitle("Tap in and Population")
## `geom_smooth()` using formula 'y ~ x'
scatter.smooth(x=tap_in_subzone$SUBZONE_POPULATION, y=tap_in_subzone$total_tap_in, main="Tap in ~ Population")
#correlation
cor(tap_in_subzone$SUBZONE_POPULATION, tap_in_subzone$total_tap_in)
## [1] 0.7576242
##Spatial Correlation
#Creating Queen Contiguity Based Neigbours
wm_q <- poly2nb(tap_in_subzone, queen=TRUE)
summary(wm_q)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1844
## Percentage nonzero weights: 1.982263
## Average number of links: 6.045902
## Link number distribution:
##
## 2 3 4 5 6 7 8 9 10 11 12 14 17
## 6 11 27 79 76 50 36 13 2 2 1 1 1
## 6 least connected regions:
## 2 41 132 170 228 275 with 2 links
## 1 most connected region:
## 39 with 17 links
From this data , each subzone has 2 links and an average of 6 connections.The most connected area unit has 17 neighbours .
#Plotting Queen Contiguity Based Neighbours
centroids <- sf::st_centroid(tap_in_subzone$geometry)
plot(tap_in_subzone$geometry, border='lightgrey', main ="Queen contiguity based neighbours map for tap in")
plot(wm_q, st_coordinates(centroids), add=TRUE, col='blue')
#Creating (ROOK) contiguity based neighbours
wm_r <- poly2nb(tap_out_subzone, queen=FALSE)
summary(wm_r)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1596
## Percentage nonzero weights: 1.715668
## Average number of links: 5.232787
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 13 14
## 1 5 24 70 91 62 29 15 4 2 1 1
## 1 least connected region:
## 170 with 1 link
## 1 most connected region:
## 39 with 14 links
The summary report above shows that there are 305 area units. The most connect area unit has 14 neighbours. There is 1 units with only one neighbour.
#the neighbors for the first polygon in the object
wm_q[[1]]
## [1] 215 216 218 219 220 262
Polygon 1 has 6 neighbors. The numbers represent the polygon IDs as stored in hunan SpatialPolygonsDataFrame class.
#name of Polygon ID=1
tap_in_subzone$SUBZONE_N[1]
## [1] "ADMIRALTY"
#the county names of the five neighboring polygons
tap_in_subzone$SUBZONE_N[c(2,3,4,57,85)]
## [1] "AIRPORT ROAD" "ALEXANDRA HILL" "ALEXANDRA NORTH" "CLEMENTI WOODS"
## [5] "GREENWOOD PARK"
#weight matrix
str(wm_q)
## List of 305
## $ : int [1:6] 215 216 218 219 220 262
## $ : int [1:2] 64 195
## $ : int [1:8] 4 36 65 122 148 157 200 202
## $ : int [1:6] 3 35 43 89 148 202
## $ : int [1:9] 23 80 81 111 112 116 143 162 254
## $ : int [1:6] 33 91 93 165 243 244
## $ : int [1:4] 60 75 155 221
## $ : int [1:8] 44 50 120 233 269 295 296 298
## $ : int [1:5] 38 47 52 70 253
## $ : int [1:6] 18 114 125 144 160 265
## $ : int [1:4] 39 63 72 102
## $ : int [1:5] 38 58 149 151 241
## $ : int [1:4] 16 78 235 293
## $ : int [1:8] 15 16 78 109 121 236 250 293
## $ : int [1:6] 14 109 121 185 236 250
## $ : int [1:5] 13 14 78 236 293
## $ : int [1:7] 28 66 74 133 211 242 282
## $ : int [1:8] 10 80 110 114 125 129 197 224
## $ : int [1:7] 87 88 103 134 209 234 276
## $ : int [1:5] 104 137 224 280 292
## $ : int [1:9] 27 50 135 137 154 230 266 269 292
## $ : int [1:6] 46 51 53 188 193 201
## $ : int [1:5] 5 80 110 111 112
## $ : int [1:7] 45 94 108 124 247 284 305
## $ : int [1:5] 27 125 135 189 265
## $ : int [1:8] 84 131 163 172 173 184 239 251
## $ : int [1:5] 21 25 135 265 266
## $ : int [1:5] 17 51 66 77 282
## $ : int [1:5] 31 34 86 95 260
## $ : int [1:5] 51 62 113 207 282
## $ : int [1:5] 29 32 33 34 86
## $ : int [1:4] 31 33 86 91
## $ : int [1:8] 6 31 32 34 91 243 267 268
## $ : int [1:7] 29 31 33 260 267 303 304
## $ : int [1:7] 4 43 89 188 206 263 264
## $ : int [1:7] 3 65 89 115 202 259 264
## $ : int [1:5] 38 47 156 201 253
## $ : int [1:9] 9 12 37 52 58 151 201 241 253
## $ : int [1:17] 11 63 72 79 90 145 147 161 165 166 ...
## $ : int [1:3] 41 42 293
## $ : int [1:2] 40 42
## $ : int [1:7] 40 41 76 140 141 248 293
## $ : int [1:9] 4 35 131 148 172 184 204 206 251
## $ : int [1:7] 8 50 214 230 232 269 296
## $ : int [1:6] 24 94 108 124 247 276
## $ : int [1:6] 22 47 53 188 193 201
## $ : int [1:10] 9 37 46 70 156 188 192 201 237 253
## $ : int [1:5] 49 119 190 256 286
## $ : int [1:6] 48 79 190 256 286 294
## $ : int [1:8] 8 21 44 137 154 230 232 269
## $ : int [1:11] 22 28 30 53 58 62 77 113 149 201 ...
## $ : int [1:8] 9 38 70 115 153 228 241 258
## $ : int [1:6] 22 46 51 77 188 206
## $ : int [1:5] 55 56 57 71 285
## $ : int [1:6] 54 57 71 238 243 268
## $ : int [1:6] 54 57 175 177 196 285
## $ : int [1:7] 54 55 56 67 164 177 238
## $ : int [1:5] 12 38 51 149 201
## $ : int [1:5] 92 130 148 157 252
## $ : int [1:7] 7 75 97 98 205 221 270
## $ : int [1:6] 73 90 93 130 244 278
## $ : int [1:8] 30 51 112 113 129 149 207 254
## $ : int [1:6] 11 39 83 91 102 165
## $ : int [1:9] 2 109 116 117 136 138 187 195 246
## $ : int [1:6] 3 36 122 200 257 259
## $ : int [1:7] 17 28 77 101 174 211 239
## $ : int [1:5] 57 82 164 171 238
## $ : int [1:5] 90 144 161 163 278
## $ : int [1:4] 150 152 162 235
## $ : int [1:6] 9 47 52 115 237 258
## $ : int [1:5] 54 55 100 268 285
## $ : int [1:5] 11 39 79 102 210
## $ : int [1:4] 61 130 204 278
## $ : int [1:6] 17 114 129 133 142 211
## $ : int [1:8] 7 60 155 212 213 214 222 270
## $ : int [1:5] 42 140 141 180 248
## $ : int [1:7] 28 51 53 66 99 174 206
## $ : int [1:8] 13 14 16 81 118 121 152 235
## $ : int [1:10] 39 49 72 147 176 210 223 256 277 294
## $ : int [1:7] 5 18 23 110 111 197 224
## $ : int [1:8] 5 78 109 116 118 121 143 162
## $ : int [1:5] 67 92 171 238 279
## $ : int [1:9] 63 86 91 95 102 119 223 256 260
## $ : int [1:6] 26 144 160 163 167 173
## $ : int [1:6] 158 168 227 288 289 291
## $ : int [1:6] 29 31 32 83 91 95
## $ : int [1:4] 19 88 194 234
## $ : int [1:7] 19 87 103 194 261 271 272
## $ : int [1:6] 4 35 36 115 202 264
## $ : int [1:6] 39 61 68 161 244 278
## $ : int [1:7] 6 32 33 63 83 86 165
## $ : int [1:6] 59 82 130 171 252 279
## $ : int [1:6] 6 61 130 243 244 279
## $ : int [1:7] 24 45 128 247 260 284 304
## $ : int [1:5] 29 83 86 119 260
## $ : int [1:5] 97 98 117 126 136
## $ : int [1:5] 60 96 98 117 205
## $ : int [1:7] 60 96 97 126 230 231 270
## $ : int [1:4] 77 131 174 206
## [list output truncated]
## - attr(*, "class")= chr "nb"
## - attr(*, "region.id")= chr [1:305] "1" "2" "3" "4" ...
## - attr(*, "call")= language poly2nb(pl = tap_in_subzone, queen = TRUE)
## - attr(*, "type")= chr "queen"
## - attr(*, "sym")= logi TRUE
#Creating (ROOK) contiguity based neighbours
wm_r <- poly2nb(tap_in_subzone, queen=FALSE)
summary(wm_r)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1596
## Percentage nonzero weights: 1.715668
## Average number of links: 5.232787
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 13 14
## 1 5 24 70 91 62 29 15 4 2 1 1
## 1 least connected region:
## 170 with 1 link
## 1 most connected region:
## 39 with 14 links
The summary report above shows that there are 305 area units. The most connect area unit has 14 neighbours. There are two area units with only one heighbours.
#Plotting Rook contiguity based neighbours maps
centroids <- sf::st_centroid(tap_in_subzone$geometry)
plot(tap_in_subzone$geometry, border="lightgrey", main = "Rook contiguity based neighbours map for tap in")
plot(wm_r, st_coordinates(centroids), pch=19, cex=0.6, add=TRUE, col="green", main="Rook Contiguity")
#Computing distance based neighbours
coords <- st_coordinates(centroids)
k1 <- knn2nb(knearneigh(coords))
k1dists <- unlist(nbdists(k1, coords, longlat = TRUE))
## Warning in nbdists(k1, coords, longlat = TRUE): Coordinates are not
## geographical: longlat argument wrong
summary(k1dists)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.57 7331.07 10974.53 10535.70 13896.43 19611.70
#Computing fixed distance weight matrix
wm_d62 <- dnearneigh(coords, 0, 62, longlat = TRUE)
## Warning in dnearneigh(coords, 0, 62, longlat = TRUE): Coordinates are not
## geographical: longlat argument wrong
wm_d62
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 4
## Percentage nonzero weights: 0.004299919
## Average number of links: 0.01311475
## 301 regions with no links:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
str(wm_d62)
## List of 305
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 247
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 264
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## [list output truncated]
## - attr(*, "class")= chr "nb"
## - attr(*, "nbtype")= chr "distance"
## - attr(*, "distances")= num [1:2] 0 62
## - attr(*, "region.id")= chr [1:305] "1" "2" "3" "4" ...
## - attr(*, "call")= language dnearneigh(x = coords, d1 = 0, d2 = 62, longlat = TRUE)
## - attr(*, "dnn")= num [1:2] 0 62
## - attr(*, "bounds")= chr [1:2] "GT" "LE"
## - attr(*, "sym")= logi TRUE
#Computing adaptive distance weight matrix
wm_knn6 <- knn2nb(knearneigh(coords, k=6))
wm_knn6
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1830
## Percentage nonzero weights: 1.967213
## Average number of links: 6
## Non-symmetric neighbours list
#Row-standardised weights matrix
rswm_q2 <- nb2listw(wm_knn6, style="W", zero.policy = TRUE)
rswm_q2
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1830
## Percentage nonzero weights: 1.967213
## Average number of links: 6
## Non-symmetric neighbours list
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 305 93025 305 90.44444 1252.944
#Global Moran I
lm.morantest(linearMod,rswm_q2, zero.policy=NULL, alternative = "greater",
spChk=NULL, resfun=weighted.residuals, naSubset=TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = tap_in ~ population)
## weights: rswm_q2
##
## Moran I statistic standard deviate = -0.43946, p-value = 0.6698
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## -0.0177921151 -0.0043008630 0.0009424526
Since p-value=0.5844>0.5, we do not reject null hypothesis, relationship between tap in and population is random.
#Local Moran
tap_in_localMI <- localmoran(tap_in_subzone$total_tap_in, rswm_q2)
head(tap_in_localMI)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.103935256 -0.003289474 0.1552289 0.272150242 0.39275325
## 2 -0.112647432 -0.003289474 0.1552289 -0.277564653 0.60932672
## 3 -0.077801582 -0.003289474 0.1552289 -0.189121374 0.57500116
## 4 -0.003788135 -0.003289474 0.1552289 -0.001265668 0.50050493
## 5 0.675175003 -0.003289474 0.1552289 1.722030658 0.04253198
## 6 -0.249665773 -0.003289474 0.1552289 -0.625334937 0.73412437
#Binding to local moran I to dataframe
tap_in_subzone.localMI <- cbind(tap_in_subzone, tap_in_localMI)
tap_in_subzone.localMI
## Simple feature collection with 305 features and 8 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 21494.3 xmax: 50293.96 ymax: 50256.33
## CRS: EPSG:3414
## First 10 features:
## SUBZONE_N total_tap_in SUBZONE_POPULATION Ii
## 1 ADMIRALTY 178509 14180 0.103935256
## 2 AIRPORT ROAD 22692 0 -0.112647432
## 3 ALEXANDRA HILL 497549 14260 -0.077801582
## 4 ALEXANDRA NORTH 59359 1430 -0.003788135
## 5 ALJUNIED 1613882 40040 0.675175003
## 6 ANAK BUKIT 799579 22100 -0.249665773
## 7 ANCHORVALE 472053 43750 0.150099805
## 8 ANG MO KIO TOWN CENTRE 1100348 4830 0.543897129
## 9 ANSON 73701 0 0.271916723
## 10 BALESTIER 1061622 32360 0.575290280
## E.Ii Var.Ii Z.Ii Pr.z...0.
## 1 -0.003289474 0.1552289 0.272150242 0.39275325
## 2 -0.003289474 0.1552289 -0.277564653 0.60932672
## 3 -0.003289474 0.1552289 -0.189121374 0.57500116
## 4 -0.003289474 0.1552289 -0.001265668 0.50050493
## 5 -0.003289474 0.1552289 1.722030658 0.04253198
## 6 -0.003289474 0.1552289 -0.625334937 0.73412437
## 7 -0.003289474 0.1552289 0.389321844 0.34851904
## 8 -0.003289474 0.1552289 1.388830422 0.08244216
## 9 -0.003289474 0.1552289 0.698508948 0.24242948
## 10 -0.003289474 0.1552289 1.468510302 0.07098283
## geometry
## 1 MULTIPOLYGON (((27468.83 48...
## 2 MULTIPOLYGON (((35580.28 37...
## 3 MULTIPOLYGON (((25899.7 297...
## 4 MULTIPOLYGON (((26231.96 30...
## 5 MULTIPOLYGON (((34449.13 33...
## 6 MULTIPOLYGON (((21069.06 36...
## 7 MULTIPOLYGON (((34972.39 42...
## 8 MULTIPOLYGON (((29692.8 389...
## 9 MULTIPOLYGON (((29201.07 28...
## 10 MULTIPOLYGON (((31228.81 34...
#Plotting local moran I values
localMI.map <- tm_shape(tap_in_subzone.localMI) +
tm_fill(col = "Ii",
style = "equal",
title = "Local Moran I") +
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(tap_in_subzone.localMI) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-RdBu",
title = " p-values") +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp=1, ncol=2)
## Warning: The shape tap_in_subzone.localMI is invalid. See sf::st_is_valid
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Warning: The shape tap_in_subzone.localMI is invalid. See sf::st_is_valid
#Mapping Z scores in the local Moran’s I test output
tm_shape(tap_in_subzone.localMI) +
tm_fill(col = "Z.Ii",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
style = "pretty",
title = "range") +
tm_borders(alpha = 0.5) + tm_layout("Z scores in local Moran’s I", 0.8)
## Warning: The shape tap_in_subzone.localMI is invalid. See sf::st_is_valid
## Variable(s) "Z.Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Computing and visualising Monte Carlo Moran’s I
set.seed(1234)
bperm<- moran.mc(tap_in_subzone$total_tap_in, listw=rswm_q2, nsim=999, zero.policy = TRUE, na.action=na.omit)
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: tap_in_subzone$total_tap_in
## weights: rswm_q2
## number of simulations + 1: 1000
##
## statistic = 0.17719, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
mean(bperm$res[1:999])
## [1] -0.005167232
var(bperm$res[1:999])
## [1] 0.0008481092
summary(bperm$res[1:999])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.096992 -0.026216 -0.006749 -0.005167 0.012854 0.088538
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I")
abline(v=0, col="red")
We can validate our findings of an insignificant p-value from the Moran’s I test as the Moran’s I in the Monte Carlo simulation is in the middle,
#Plotting Moran’s scatterplot
tap_in_nci <- moran.plot(tap_in_subzone$total_tap_in, rswm_q2, labels=as.character(tap_in_subzone$SUBZONE_N), xlab="Tap in Volume", ylab="Spatially Lag Tap In Volume")
#Compute Moran’s I correlogram and plot
MI_corr <- sp.correlogram(wm_q, tap_in_subzone$total_tap_in, order=6, method="I", style="B")
plot(MI_corr)
#Creating quadrants
quadrant <- vector(mode="numeric",length=nrow(tap_in_localMI))
tapin_DV <- tap_in_subzone$total_tap_in - mean(tap_in_subzone$total_tap_in)
tapin_C_mI <- tap_in_localMI[,1] - mean(tap_in_localMI[,1])
signif <- 0.05
quadrant[tapin_DV >0 & tapin_C_mI>0] <- 4
quadrant[tapin_DV <0 & tapin_C_mI<0] <- 1
quadrant[tapin_DV <0 & tapin_C_mI>0] <- 2
quadrant[tapin_DV >0 & tapin_C_mI<0] <- 3
quadrant[tap_in_localMI[,5]>signif] <- 0
tap_in_subzone.localMI$quadrant <- quadrant
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tm_shape(tap_in_subzone.localMI) +
tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("SUBZONE_N")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
## Warning: The shape tap_in_subzone.localMI is invalid. See sf::st_is_valid
From the LISA cluster map, the clusters that contribute the tap-in volume are areas in the East such as Tampines.This may imply that people are travelling out of those subzones for various activites such as work.
#Overall analysis for Tap In Data so far There is no spatial correlation in the residuals as the moran I value is very close to 0. This means that the residual data from the tap_in, population model follows the different subzone’s spatial randomness.
coords <- st_centroid(tap_in_subzone)
## Warning in st_centroid.sf(tap_in_subzone): st_centroid assumes attributes are
## constant over geometries of x
knb <- knn2nb(knearneigh(coords, k = 8, longlat = FALSE), row.names = row.names(tap_in_subzone$total_tap_in))
## Warning in knearneigh(coords, k = 8, longlat = FALSE): dnearneigh: longlat
## argument overrides object
tap_in_knb <- nb2listw(knb, style = 'B')
summary(tap_in_knb)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 2440
## Percentage nonzero weights: 2.622951
## Average number of links: 8
## Non-symmetric neighbours list
## Link number distribution:
##
## 8
## 305
## 305 least connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 with 8 links
## 305 most connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 305 93025 2440 4412 79922
#Plotting subzones and neighbours
plot(tap_in_subzone$geometry, border = "lightgrey")
plot(tap_in_knb, st_centroid(tap_in_subzone$geometry), pch = 19, cex = 0.6, add = TRUE, col = "red")
#Adaptive distance
fips <- order(tap_in_subzone$SUBZONE_N)
gi.adaptive <- localG(tap_in_subzone$total_tap_in, tap_in_knb)
tap_in_subzone.gi <- cbind(tap_in_subzone, as.matrix(gi.adaptive))
names(tap_in_subzone.gi)[5] <- "gstat_adaptive"
#linear model
tap_out <- tap_out_subzone$total_tap_out
population <- tap_out_subzone$SUBZONE_POPULATION
linearMod <- lm(tap_out ~ population)
summary(linearMod)
##
## Call:
## lm(formula = tap_out ~ population)
##
## Residuals:
## Min 1Q Median 3Q Max
## -786292 -121773 -59252 38217 1647325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.219e+05 2.042e+04 5.972 6.54e-09 ***
## population 1.892e+01 9.129e-01 20.728 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 289100 on 303 degrees of freedom
## Multiple R-squared: 0.5864, Adjusted R-squared: 0.5851
## F-statistic: 429.6 on 1 and 303 DF, p-value: < 2.2e-16
linearMod$coefficients
## (Intercept) population
## 121927.00573 18.92175
#Tap Out and population relationship using ggplot and scatter plot
ggplot(data=tap_out_subzone,aes(x=SUBZONE_POPULATION,y=total_tap_out)) + geom_point(size =1) + geom_smooth(method = "lm") + ggtitle("Tap Out and Population")
## `geom_smooth()` using formula 'y ~ x'
scatter.smooth(x=tap_out_subzone$SUBZONE_POPULATION, y=tap_out_subzone$total_tap_out, main="Tap Out ~ Population")
#correlation
cor(tap_out_subzone$SUBZONE_POPULATION, tap_out_subzone$total_tap_out)
## [1] 0.765783
#Analysis of Tap out data Analysis: since p< 2.2e-16, we reject null hypothesis and conclude that the model is significant. The linear equation is y=18.92x+121927. The R squared value is 0.5851 which means that 58.51% of variantion in the tap out data is because of the variation in population. The positive slope of regrssion line, 18.92 represents a positive relationship between tap out and population.
##Spatial Correlation
#Creating Queen Contiguity Based Neigbours
wm_q <- poly2nb(tap_out_subzone, queen=TRUE)
summary(wm_q)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1844
## Percentage nonzero weights: 1.982263
## Average number of links: 6.045902
## Link number distribution:
##
## 2 3 4 5 6 7 8 9 10 11 12 14 17
## 6 11 27 79 76 50 36 13 2 2 1 1 1
## 6 least connected regions:
## 2 41 132 170 228 275 with 2 links
## 1 most connected region:
## 39 with 17 links
From this data , each subzone has 2 links and an average of 6 connections.The most connected area unit has 17 neighbours .
#Plotting Queen Contiguity Based Neighbours
centroids2 <- sf::st_centroid(tap_out_subzone$geometry)
plot(tap_out_subzone$geometry, border='lightgrey', main ="Queen contiguity based neighbours map for tap out")
plot(wm_q, st_coordinates(centroids2), add=TRUE, col='blue')
#Creating (ROOK) contiguity based neighbours
wm_r2 <- poly2nb(tap_out_subzone, queen=FALSE)
summary(wm_r2)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1596
## Percentage nonzero weights: 1.715668
## Average number of links: 5.232787
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 13 14
## 1 5 24 70 91 62 29 15 4 2 1 1
## 1 least connected region:
## 170 with 1 link
## 1 most connected region:
## 39 with 14 links
The summary report above shows that there are 305 area units. The most connect area unit has 14 neighbours. There is 1 units with only one neighbour.
#the neighbors for the first polygon in the object
wm_r2[[1]]
## [1] 215 216 218 219 220 262
Polygon 1 has 6 neighbors. The numbers represent the polygon IDs as stored in hunan SpatialPolygonsDataFrame class.
#name of Polygon ID=1
tap_out_subzone$SUBZONE_N[1]
## [1] "ADMIRALTY"
#the county names of the five neighboring polygons
tap_out_subzone$SUBZONE_N[c(2,3,4,57,85)]
## [1] "AIRPORT ROAD" "ALEXANDRA HILL" "ALEXANDRA NORTH" "CLEMENTI WOODS"
## [5] "GREENWOOD PARK"
#weight matrix
str(wm_q)
## List of 305
## $ : int [1:6] 215 216 218 219 220 262
## $ : int [1:2] 64 195
## $ : int [1:8] 4 36 65 122 148 157 200 202
## $ : int [1:6] 3 35 43 89 148 202
## $ : int [1:9] 23 80 81 111 112 116 143 162 254
## $ : int [1:6] 33 91 93 165 243 244
## $ : int [1:4] 60 75 155 221
## $ : int [1:8] 44 50 120 233 269 295 296 298
## $ : int [1:5] 38 47 52 70 253
## $ : int [1:6] 18 114 125 144 160 265
## $ : int [1:4] 39 63 72 102
## $ : int [1:5] 38 58 149 151 241
## $ : int [1:4] 16 78 235 293
## $ : int [1:8] 15 16 78 109 121 236 250 293
## $ : int [1:6] 14 109 121 185 236 250
## $ : int [1:5] 13 14 78 236 293
## $ : int [1:7] 28 66 74 133 211 242 282
## $ : int [1:8] 10 80 110 114 125 129 197 224
## $ : int [1:7] 87 88 103 134 209 234 276
## $ : int [1:5] 104 137 224 280 292
## $ : int [1:9] 27 50 135 137 154 230 266 269 292
## $ : int [1:6] 46 51 53 188 193 201
## $ : int [1:5] 5 80 110 111 112
## $ : int [1:7] 45 94 108 124 247 284 305
## $ : int [1:5] 27 125 135 189 265
## $ : int [1:8] 84 131 163 172 173 184 239 251
## $ : int [1:5] 21 25 135 265 266
## $ : int [1:5] 17 51 66 77 282
## $ : int [1:5] 31 34 86 95 260
## $ : int [1:5] 51 62 113 207 282
## $ : int [1:5] 29 32 33 34 86
## $ : int [1:4] 31 33 86 91
## $ : int [1:8] 6 31 32 34 91 243 267 268
## $ : int [1:7] 29 31 33 260 267 303 304
## $ : int [1:7] 4 43 89 188 206 263 264
## $ : int [1:7] 3 65 89 115 202 259 264
## $ : int [1:5] 38 47 156 201 253
## $ : int [1:9] 9 12 37 52 58 151 201 241 253
## $ : int [1:17] 11 63 72 79 90 145 147 161 165 166 ...
## $ : int [1:3] 41 42 293
## $ : int [1:2] 40 42
## $ : int [1:7] 40 41 76 140 141 248 293
## $ : int [1:9] 4 35 131 148 172 184 204 206 251
## $ : int [1:7] 8 50 214 230 232 269 296
## $ : int [1:6] 24 94 108 124 247 276
## $ : int [1:6] 22 47 53 188 193 201
## $ : int [1:10] 9 37 46 70 156 188 192 201 237 253
## $ : int [1:5] 49 119 190 256 286
## $ : int [1:6] 48 79 190 256 286 294
## $ : int [1:8] 8 21 44 137 154 230 232 269
## $ : int [1:11] 22 28 30 53 58 62 77 113 149 201 ...
## $ : int [1:8] 9 38 70 115 153 228 241 258
## $ : int [1:6] 22 46 51 77 188 206
## $ : int [1:5] 55 56 57 71 285
## $ : int [1:6] 54 57 71 238 243 268
## $ : int [1:6] 54 57 175 177 196 285
## $ : int [1:7] 54 55 56 67 164 177 238
## $ : int [1:5] 12 38 51 149 201
## $ : int [1:5] 92 130 148 157 252
## $ : int [1:7] 7 75 97 98 205 221 270
## $ : int [1:6] 73 90 93 130 244 278
## $ : int [1:8] 30 51 112 113 129 149 207 254
## $ : int [1:6] 11 39 83 91 102 165
## $ : int [1:9] 2 109 116 117 136 138 187 195 246
## $ : int [1:6] 3 36 122 200 257 259
## $ : int [1:7] 17 28 77 101 174 211 239
## $ : int [1:5] 57 82 164 171 238
## $ : int [1:5] 90 144 161 163 278
## $ : int [1:4] 150 152 162 235
## $ : int [1:6] 9 47 52 115 237 258
## $ : int [1:5] 54 55 100 268 285
## $ : int [1:5] 11 39 79 102 210
## $ : int [1:4] 61 130 204 278
## $ : int [1:6] 17 114 129 133 142 211
## $ : int [1:8] 7 60 155 212 213 214 222 270
## $ : int [1:5] 42 140 141 180 248
## $ : int [1:7] 28 51 53 66 99 174 206
## $ : int [1:8] 13 14 16 81 118 121 152 235
## $ : int [1:10] 39 49 72 147 176 210 223 256 277 294
## $ : int [1:7] 5 18 23 110 111 197 224
## $ : int [1:8] 5 78 109 116 118 121 143 162
## $ : int [1:5] 67 92 171 238 279
## $ : int [1:9] 63 86 91 95 102 119 223 256 260
## $ : int [1:6] 26 144 160 163 167 173
## $ : int [1:6] 158 168 227 288 289 291
## $ : int [1:6] 29 31 32 83 91 95
## $ : int [1:4] 19 88 194 234
## $ : int [1:7] 19 87 103 194 261 271 272
## $ : int [1:6] 4 35 36 115 202 264
## $ : int [1:6] 39 61 68 161 244 278
## $ : int [1:7] 6 32 33 63 83 86 165
## $ : int [1:6] 59 82 130 171 252 279
## $ : int [1:6] 6 61 130 243 244 279
## $ : int [1:7] 24 45 128 247 260 284 304
## $ : int [1:5] 29 83 86 119 260
## $ : int [1:5] 97 98 117 126 136
## $ : int [1:5] 60 96 98 117 205
## $ : int [1:7] 60 96 97 126 230 231 270
## $ : int [1:4] 77 131 174 206
## [list output truncated]
## - attr(*, "class")= chr "nb"
## - attr(*, "region.id")= chr [1:305] "1" "2" "3" "4" ...
## - attr(*, "call")= language poly2nb(pl = tap_out_subzone, queen = TRUE)
## - attr(*, "type")= chr "queen"
## - attr(*, "sym")= logi TRUE
#Creating (ROOK) contiguity based neighbours
wm_r <- poly2nb(tap_out_subzone, queen=FALSE)
summary(wm_r)
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1596
## Percentage nonzero weights: 1.715668
## Average number of links: 5.232787
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 13 14
## 1 5 24 70 91 62 29 15 4 2 1 1
## 1 least connected region:
## 170 with 1 link
## 1 most connected region:
## 39 with 14 links
The summary report above shows that there are 305 area units. The most connect area unit has 14 neighbours. There are two area units with only one heighbours.
#Plotting Rook contiguity based neighbours maps
centroids2 <- sf::st_centroid(tap_out_subzone$geometry)
plot(tap_out_subzone$geometry, border="lightgrey", main = "Rook contiguity based neighbours map for tap out")
plot(wm_r, st_coordinates(centroids2), pch=19, cex=0.6, add=TRUE, col="green", main="Rook Contiguity")
#Computing distance based neighbours
coords2 <- st_coordinates(centroids2)
k12 <- knn2nb(knearneigh(coords))
k1dists2 <- unlist(nbdists(k12, coords2, longlat = TRUE))
## Warning in nbdists(k12, coords2, longlat = TRUE): Coordinates are not
## geographical: longlat argument wrong
summary(k1dists2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.57 7331.07 10974.53 10535.70 13896.43 19611.70
#Computing fixed distance weight matrix
wm_d622 <- dnearneigh(coords, 0, 62, longlat = TRUE)
## Warning in dnearneigh(coords, 0, 62, longlat = TRUE): dnearneigh: longlat
## argument overrides object
wm_d622
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 0
## Percentage nonzero weights: 0
## Average number of links: 0
## 305 regions with no links:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
str(wm_d622)
## List of 305
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## $ : int 0
## [list output truncated]
## - attr(*, "class")= chr "nb"
## - attr(*, "nbtype")= chr "distance"
## - attr(*, "distances")= num [1:2] 0 62
## - attr(*, "region.id")= chr [1:305] "1" "2" "3" "4" ...
## - attr(*, "call")= language dnearneigh(x = coords, d1 = 0, d2 = 62, longlat = TRUE)
## - attr(*, "dnn")= num [1:2] 0 62
## - attr(*, "bounds")= chr [1:2] "GT" "LE"
## - attr(*, "sym")= logi TRUE
#Computing adaptive distance weight matrix
wm_knn62 <- knn2nb(knearneigh(coords2, k=6))
wm_knn62
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1830
## Percentage nonzero weights: 1.967213
## Average number of links: 6
## Non-symmetric neighbours list
#Row-standardised weights matrix
rswm_q22 <- nb2listw(wm_knn62, style="W", zero.policy = TRUE)
rswm_q22
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 1830
## Percentage nonzero weights: 1.967213
## Average number of links: 6
## Non-symmetric neighbours list
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 305 93025 305 90.44444 1252.944
#Global Moran I
lm.morantest(linearMod,rswm_q22, zero.policy=NULL, alternative = "greater",
spChk=NULL, resfun=weighted.residuals, naSubset=TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = tap_out ~ population)
## weights: rswm_q22
##
## Moran I statistic standard deviate = -0.21325, p-value = 0.5844
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## -0.0108475978 -0.0043008630 0.0009424526
Since p-value=0.5844>0.5, we do not reject null hypothesis, relationship between tap out and population is random.
#Local Moran
tap_out_localMI <- localmoran(tap_out_subzone$total_tap_out, rswm_q2)
head(tap_out_localMI)
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 1 0.07976977 -0.003289474 0.1554453 0.21066827 0.41657307
## 2 -0.12465117 -0.003289474 0.1554453 -0.30781715 0.62088926
## 3 -0.14003199 -0.003289474 0.1554453 -0.34682847 0.63563991
## 4 0.01428841 -0.003289474 0.1554453 0.04458388 0.48221950
## 5 0.53236443 -0.003289474 0.1554453 1.35861201 0.08713478
## 6 -0.23235926 -0.003289474 0.1554453 -0.58100381 0.71938106
#Binding to local moran I to dataframe
tap_out_subzone.localMI <- cbind(tap_out_subzone, tap_out_localMI)
tap_out_subzone.localMI
## Simple feature collection with 305 features and 8 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 21494.3 xmax: 50293.96 ymax: 50256.33
## CRS: EPSG:3414
## First 10 features:
## SUBZONE_N total_tap_out SUBZONE_POPULATION Ii
## 1 ADMIRALTY 228889 14180 0.07976977
## 2 AIRPORT ROAD 24524 0 -0.12465117
## 3 ALEXANDRA HILL 603813 14260 -0.14003199
## 4 ALEXANDRA NORTH 41292 1430 0.01428841
## 5 ALJUNIED 1704099 40040 0.53236443
## 6 ANAK BUKIT 745657 22100 -0.23235926
## 7 ANCHORVALE 474260 43750 0.15610372
## 8 ANG MO KIO TOWN CENTRE 1052820 4830 0.46123303
## 9 ANSON 59522 0 0.30313432
## 10 BALESTIER 1065839 32360 0.57823063
## E.Ii Var.Ii Z.Ii Pr.z...0. geometry
## 1 -0.003289474 0.1554453 0.21066827 0.41657307 MULTIPOLYGON (((27468.83 48...
## 2 -0.003289474 0.1554453 -0.30781715 0.62088926 MULTIPOLYGON (((35580.28 37...
## 3 -0.003289474 0.1554453 -0.34682847 0.63563991 MULTIPOLYGON (((25899.7 297...
## 4 -0.003289474 0.1554453 0.04458388 0.48221950 MULTIPOLYGON (((26231.96 30...
## 5 -0.003289474 0.1554453 1.35861201 0.08713478 MULTIPOLYGON (((34449.13 33...
## 6 -0.003289474 0.1554453 -0.58100381 0.71938106 MULTIPOLYGON (((21069.06 36...
## 7 -0.003289474 0.1554453 0.40427877 0.34300387 MULTIPOLYGON (((34972.39 42...
## 8 -0.003289474 0.1554453 1.17819705 0.11935903 MULTIPOLYGON (((29692.8 389...
## 9 -0.003289474 0.1554453 0.77720155 0.21851993 MULTIPOLYGON (((29201.07 28...
## 10 -0.003289474 0.1554453 1.47494526 0.07011363 MULTIPOLYGON (((31228.81 34...
#Plotting local moran I values
localMI.map <- tm_shape(tap_out_subzone.localMI) +
tm_fill(col = "Ii",
style = "equal",
title = "Local Moran I") +
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(tap_out_subzone.localMI) +
tm_fill(col = "Pr.z...0.",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette="-RdBu",
title = " p-values") +
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp=1, ncol=2)
## Warning: The shape tap_out_subzone.localMI is invalid. See sf::st_is_valid
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Warning: The shape tap_out_subzone.localMI is invalid. See sf::st_is_valid
#Mapping Z scores in the local Moran’s I test output
tm_shape(tap_out_subzone.localMI) +
tm_fill(col = "Z.Ii",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
style = "pretty",
title = "range") +
tm_borders(alpha = 0.5) + tm_layout("Z scores in local Moran’s I", 0.8)
## Warning: The shape tap_out_subzone.localMI is invalid. See sf::st_is_valid
## Variable(s) "Z.Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Computing and visualising Monte Carlo Moran’s I
set.seed(1234)
bperm<- moran.mc(tap_out_subzone$total_tap_out, listw=rswm_q2, nsim=999, zero.policy = TRUE, na.action=na.omit)
bperm
##
## Monte-Carlo simulation of Moran I
##
## data: tap_out_subzone$total_tap_out
## weights: rswm_q2
## number of simulations + 1: 1000
##
## statistic = 0.18213, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
mean(bperm$res[1:999])
## [1] -0.004971756
var(bperm$res[1:999])
## [1] 0.000847986
summary(bperm$res[1:999])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.095220 -0.025899 -0.005891 -0.004972 0.012822 0.092149
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I")
abline(v=0, col="red")
We can validate our findings of an insignificant p-value from the Moran’s I test as the Moran’s I in the Monte Carlo simulation is in the middle,
#Plotting Moran’s scatterplot
tap_out_nci <- moran.plot(tap_out_subzone$total_tap_out, rswm_q2, labels=as.character(tap_out_subzone$SUBZONE_N), xlab="Tap out Volume", ylab="Spatially Lag Tap out Volume")
#Compute Moran’s I correlogram and plot
MI_corr <- sp.correlogram(wm_q, tap_out_subzone$total_tap_out, order=6, method="I", style="B")
plot(MI_corr)
#Creating quadrants
quadrant <- vector(mode="numeric",length=nrow(tap_out_localMI))
tapout_DV <- tap_out_subzone$total_tap_out - mean(tap_out_subzone$total_tap_out)
tapout_C_mI <- tap_out_localMI[,1] - mean(tap_out_localMI[,1])
signif <- 0.05
quadrant[tapout_DV >0 & tapout_C_mI>0] <- 4
quadrant[tapout_DV <0 & tapout_C_mI<0] <- 1
quadrant[tapout_DV <0 & tapout_C_mI>0] <- 2
quadrant[tapout_DV >0 & tapout_C_mI<0] <- 3
quadrant[tap_out_localMI[,5]>signif] <- 0
tap_out_subzone.localMI$quadrant <- quadrant
colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")
clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")
tm_shape(tap_out_subzone.localMI) +
tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("SUBZONE_N")) +
tm_view(set.zoom.limits = c(11,17)) +
tm_borders(alpha=0.5)
## Warning: The shape tap_out_subzone.localMI is invalid. See sf::st_is_valid
From the LISA cluster map, the clusters that contribute the tap-out volume are areas in the East such as Tampines.This may imply that people are travelling out of those subzones for various activites such as work.
#Overall analysis for Tap out Data so far There is no spatial correlation in the residuals as the moran I value is very close to 0. This means that the residual data from the tap_out, population model follows the different subzone’s spatial randomness.
coords <- st_centroid(tap_out_subzone)
## Warning in st_centroid.sf(tap_out_subzone): st_centroid assumes attributes are
## constant over geometries of x
knb <- knn2nb(knearneigh(coords, k = 8, longlat = FALSE), row.names = row.names(tap_out_subzone$total_tap_out))
## Warning in knearneigh(coords, k = 8, longlat = FALSE): dnearneigh: longlat
## argument overrides object
tap_out_knb <- nb2listw(knb, style = 'B')
summary(tap_out_knb)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 305
## Number of nonzero links: 2440
## Percentage nonzero weights: 2.622951
## Average number of links: 8
## Non-symmetric neighbours list
## Link number distribution:
##
## 8
## 305
## 305 least connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 with 8 links
## 305 most connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 305 93025 2440 4412 79922
#Plotting subzones and neighbours
plot(tap_out_subzone$geometry, border = "lightgrey")
plot(tap_out_knb, st_centroid(tap_out_subzone$geometry), pch = 19, cex = 0.6, add = TRUE, col = "red")
#Adaptive distance
fips <- order(tap_out_subzone$SUBZONE_N)
gi.adaptive <- localG(tap_out_subzone$total_tap_out, tap_out_knb)
tap_out_subzone.gi <- cbind(tap_out_subzone, as.matrix(gi.adaptive))
names(tap_out_subzone.gi)[5] <- "gstat_adaptive"