packages = c('rgdal', 'spdep', 'maptools','seriation', 'dendextend', 'heatmaply', 'tidyverse','sf', 'tmap', 'tidyverse', 'GGally', 'plotly', 'parcoords')
for(p in packages){library
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Users/Jufri/Library/R/3.6/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Users/Jufri/Library/R/3.6/library/rgdal/proj
## Linking to sp version: 1.3-2
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: seriation
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
## Loading required package: dendextend
##
## ---------------------
## Welcome to dendextend version 1.13.4
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
## Loading required package: heatmaply
## Loading required package: plotly
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: viridis
## Loading required package: viridisLite
##
## ======================
## Welcome to heatmaply version 1.1.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## ======================
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.3
## ── Conflicts ───────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: tmap
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
## Loading required package: parcoords
#require(devtools)
#install_version("ggplot2", version = "3.2.1", repos = "http://cran.us.r-project.org")
library(ggplot2)
#https://geodacenter.github.io/tutorials/spatial_cluster/skater.html
#https://cran.r-project.org/web/packages/spdep/spdep.pdf
#mpsz <- st_read(dsn = "data/London-wards-2018_ESRI/",
# layer = "London_Ward")
#mpsz <- readOGR(dsn = "data/London-wards-2018_ESRI/",
# layer = "London_Ward")
mpsz <- st_read("data/wardmap.shp")
## Reading layer `wardmap' from data source `/Users/Jufri/OneDrive - Singapore Management University/ISSS608 - Visual Analytics and Applications/DataViz/Week09/Week09/data/wardmap.shp' using driver `ESRI Shapefile'
## Simple feature collection with 638 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## epsg (SRID): 27700
## proj4string: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs
mpsz
## Simple feature collection with 638 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## epsg (SRID): 27700
## proj4string: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs
## First 10 features:
## area_id ward long_x lat_x geometry
## 1 E05000026 Abbey 0.081277 51.53983 MULTIPOLYGON (((544395.7 18...
## 2 E05000027 Alibon 0.150987 51.54592 MULTIPOLYGON (((549604.1 18...
## 3 E05000028 Becontree 0.116912 51.55260 MULTIPOLYGON (((547563.4 18...
## 4 E05000029 Chadwell Heath 0.138596 51.58342 MULTIPOLYGON (((548881 1910...
## 5 E05000030 Eastbrook 0.173453 51.55519 MULTIPOLYGON (((551552.9 18...
## 6 E05000031 Eastbury 0.104756 51.53581 MULTIPOLYGON (((547271.2 18...
## 7 E05000032 Gascoigne 0.085689 51.53257 MULTIPOLYGON (((543996.8 18...
## 8 E05000033 Goresbrook 0.132641 51.53604 MULTIPOLYGON (((549072.8 18...
## 9 E05000034 Heath 0.152931 51.55869 MULTIPOLYGON (((550392.7 18...
## 10 E05000035 Longbridge 0.093644 51.54443 MULTIPOLYGON (((546185.3 18...
summary(mpsz)
## area_id ward long_x lat_x
## E05000026: 1 Abbey : 2 Min. :-0.48424 Min. :51.30
## E05000027: 1 Alexandra: 2 1st Qu.:-0.22076 1st Qu.:51.45
## E05000028: 1 Barnhill : 2 Median :-0.11886 Median :51.51
## E05000029: 1 Belmont : 2 Mean :-0.12332 Mean :51.50
## E05000030: 1 Edgware : 2 3rd Qu.:-0.02146 3rd Qu.:51.56
## E05000031: 1 Fairfield: 2 Max. : 0.27621 Max. :51.68
## (Other) :632 (Other) :626
## geometry
## MULTIPOLYGON :638
## epsg:27700 : 0
## +proj=tmer...: 0
##
##
##
##
plot(st_geometry(mpsz),border=gray(0.5))
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
tm_bubbles(col = "red",
size = 0.2,
border.col = "black",
border.lwd = 1)
tesco <- read_csv("data/year_osward_grocery.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## area_id = col_character()
## )
## See spec(...) for full column specifications.
#row.names(tesco) <- tesco$area_id
row.names(tesco) <- tesco$area_id
## Warning: Setting row names on a tibble is deprecated.
tesco_measures <- select(tesco, c(1,18,26,34,42,50,58,66,74))
#names(tesco_measures)[names(tesco_measures) == "area_id"] <- "LAGSSCODE"
mapward <- right_join(mpsz, tesco_measures,
by = c("area_id" = "area_id"))
## Warning: Column `area_id` joining factor and character vector, coercing into
## character vector
vars <- c(5,6,7,8,9,10,11,12)
#dat <- select(mapward, vars)
#dat <- data.frame(mpsz_1@data[,vars])
dat <- data.frame(scale(as.data.frame(mapward)[,vars]))
summary(dat)
## fat saturate salt sugar
## Min. :-4.01619 Min. :-3.5942 Min. :-2.7569 Min. :-2.7651
## 1st Qu.:-0.59329 1st Qu.:-0.5683 1st Qu.:-0.6997 1st Qu.:-0.6828
## Median :-0.09267 Median :-0.1003 Median : 0.0193 Median :-0.1761
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.48963 3rd Qu.: 0.4333 3rd Qu.: 0.6720 3rd Qu.: 0.7087
## Max. : 8.18252 Max. : 8.6391 Max. : 4.3510 Max. : 8.0120
## protein carb fibre alcohol
## Min. :-3.8440 Min. :-3.34969 Min. :-2.3196 Min. :-2.55428
## 1st Qu.:-0.4969 1st Qu.:-0.70638 1st Qu.:-0.7074 1st Qu.:-0.66014
## Median : 0.1434 Median : 0.02254 Median :-0.1407 Median :-0.09026
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.6857 3rd Qu.: 0.71047 3rd Qu.: 0.5421 3rd Qu.: 0.50876
## Max. : 3.0311 Max. : 3.66477 Max. : 4.1792 Max. : 4.98947
sdat <- scale(dat)
summary(sdat)
## fat saturate salt sugar
## Min. :-4.01619 Min. :-3.5942 Min. :-2.7569 Min. :-2.7651
## 1st Qu.:-0.59329 1st Qu.:-0.5683 1st Qu.:-0.6997 1st Qu.:-0.6828
## Median :-0.09267 Median :-0.1003 Median : 0.0193 Median :-0.1761
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.48963 3rd Qu.: 0.4333 3rd Qu.: 0.6720 3rd Qu.: 0.7087
## Max. : 8.18252 Max. : 8.6391 Max. : 4.3510 Max. : 8.0120
## protein carb fibre alcohol
## Min. :-3.8440 Min. :-3.34969 Min. :-2.3196 Min. :-2.55428
## 1st Qu.:-0.4969 1st Qu.:-0.70638 1st Qu.:-0.7074 1st Qu.:-0.66014
## Median : 0.1434 Median : 0.02254 Median :-0.1407 Median :-0.09026
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.6857 3rd Qu.: 0.71047 3rd Qu.: 0.5421 3rd Qu.: 0.50876
## Max. : 3.0311 Max. : 3.66477 Max. : 4.1792 Max. : 4.98947
mpsz.nb <- poly2nb(mpsz)
summary(mpsz.nb)
## Neighbour list object:
## Number of regions: 638
## Number of nonzero links: 3602
## Percentage nonzero weights: 0.8849166
## Average number of links: 5.645768
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1 10 32 89 166 167 114 41 12 4 1 1
## 1 least connected region:
## 298 with 1 link
## 1 most connected region:
## 249 with 12 links
Minimum spanning tree
lcosts <- nbcosts(mpsz.nb,sdat)
head(lcosts)
## [[1]]
## [1] 2.775024 2.799759 2.250071 3.158512 2.010419 2.387036
##
## [[2]]
## [1] 1.628270 2.389215 2.376214 2.537192 1.885154 2.461399 1.362063
##
## [[3]]
## [1] 2.3789039 1.3407767 0.9966116 2.3614112 4.0277109 3.3036025
##
## [[4]]
## [1] 4.408389 5.044217 4.879565 3.363022 3.997615 4.725135
##
## [[5]]
## [1] 1.628270 2.055111 2.057478 1.744264 2.053112 3.142689
##
## [[6]]
## [1] 2.775024 2.079000 1.377151 3.546190 1.561263 2.071944
mpsz.w <- nb2listw(mpsz.nb,lcosts,style="B")
summary(mpsz.w)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 638
## Number of nonzero links: 3602
## Percentage nonzero weights: 0.8849166
## Average number of links: 5.645768
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1 10 32 89 166 167 114 41 12 4 1 1
## 1 least connected region:
## 298 with 1 link
## 1 most connected region:
## 249 with 12 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 638 407044 8389.95 52258.83 552577.4
mpsz.mst <- mstree(mpsz.w)
class(mpsz.mst)
## [1] "mst" "matrix"
dim(mpsz.mst)
## [1] 637 3
head(mpsz.mst)
## [,1] [,2] [,3]
## [1,] 51 50 1.3565284
## [2,] 50 55 1.2130036
## [3,] 55 58 0.9583859
## [4,] 58 56 1.1724544
## [5,] 50 206 1.3853269
## [6,] 206 196 1.1872219
#plot(mpsz.mst,mpsz,col="blue",
# cex.lab=0.7)
#plot(mpsz,border=gray(.5),add=TRUE)
clusterGrp <- skater(mpsz.mst[,1:2],sdat,4)
str(clusterGrp)
## List of 8
## $ groups : num [1:638] 2 2 2 2 2 2 2 2 2 2 ...
## $ edges.groups:List of 5
## ..$ :List of 3
## .. ..$ node: num [1:18] 196 206 50 51 40 44 53 198 210 197 ...
## .. ..$ edge: num [1:17, 1:3] 210 25 198 197 53 210 205 51 51 50 ...
## .. ..$ ssw : num 34
## ..$ :List of 3
## .. ..$ node: num [1:77] 370 383 375 369 373 378 381 380 602 601 ...
## .. ..$ edge: num [1:76, 1:3] 598 597 600 227 590 608 601 217 230 223 ...
## .. ..$ ssw : num 183
## ..$ :List of 3
## .. ..$ node: num [1:69] 612 634 624 633 626 627 629 73 618 625 ...
## .. ..$ edge: num [1:68, 1:3] 626 627 634 612 73 422 629 624 630 62 ...
## .. ..$ ssw : num 142
## ..$ :List of 3
## .. ..$ node: num [1:371] 449 445 162 167 450 350 348 352 528 512 ...
## .. ..$ edge: num [1:370, 1:3] 112 106 122 275 258 274 103 104 267 118 ...
## .. ..$ ssw : num 889
## ..$ :List of 3
## .. ..$ node: num [1:103] 324 325 323 326 310 314 322 319 320 309 ...
## .. ..$ edge: num [1:102, 1:3] 565 342 567 151 159 562 160 156 336 155 ...
## .. ..$ ssw : num 187
## $ not.prune : NULL
## $ candidates : int [1:5] 1 2 3 4 5
## $ ssto : num 1641
## $ ssw : num [1:5] 1641 1572 1494 1463 1435
## $ crit : num [1:2] 1 Inf
## $ vec.crit : num [1:638] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "class")= chr "skater"
### the skater plot, using other colors
plot(clusterGrp, coordinates(as(mpsz, "Spatial")), cex.circles=0.035, cex.lab=.7,
groups.colors=heat.colors(length(clusterGrp$ed)))
plot(st_geometry(mpsz), col=heat.colors(length(clusterGrp$edg))[clusterGrp$groups])
#mpsz_test=mpsz
#mpsz_test['checkCol'] <- clus4$groups
mpsz['clusterNo'] <- clusterGrp$groups
ccs4 <- clusterGrp$groups
ccs4
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 4 4 4 4 1 4 4 4 1 4 4 4 4 4 4 4 4
## [38] 4 4 1 4 4 4 1 4 4 1 4 4 1 1 4 1 4 1 1 4 1 4 3 3 3 3 3 3 5 3 3 3 5 3 3 3 3
## [75] 5 3 5 3 5 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [112] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5
## [149] 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [186] 4 4 4 4 4 4 4 4 4 4 1 1 1 4 4 4 4 4 4 1 1 4 4 4 1 4 4 4 1 4 4 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [260] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [297] 4 4 4 4 4 4 4 4 4 4 4 4 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [334] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 3 4 3 4 4 3 5 3 4 5 3 4 3 4 4 4 4 2 2 2
## [371] 2 2 2 2 2 4 2 2 2 2 2 2 2 2 4 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3
## [408] 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [445] 4 4 4 4 4 4 5 4 4 5 4 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [482] 4 4 4 4 4 4 4 4 4 4 4 4 2 2 4 4 2 4 2 2 2 4 4 2 4 2 4 4 4 4 4 4 4 4 4 4 4
## [519] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5
## [556] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 3 5 3 5 5 5 3 5 3 3 5 5 5 2 2 4 2
## [593] 4 2 2 2 2 2 2 2 2 2 2 4 2 2 4 2 4 4 3 3 3 3 3 5 3 3 3 3 5 5 3 3 3 3 3 3 3
## [630] 3 3 3 3 3 5 3 3 3
table(ccs4)
## ccs4
## 1 2 3 4 5
## 18 77 69 371 103
# mpsz$clusterNo <- as.factor(mpsz$clusterNo)
mpsz['clusterNo'] <- as.factor(clusterGrp$groups)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
tm_bubbles(col = "clusterNo",
size = 0.2,
border.col = "black",
border.lwd = 1)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
tm_polygons("clusterNo", title="Clusters", id="area_id") +
#tm_text("ward", size=0.5, scale=1.5) +
tm_format("World")
mapward['clusterNo'] <- as.factor(clusterGrp$groups)
ggparcoord(data = mapward,
columns = c(5:12),
groupColumn = 14,
scale = "uniminmax",
boxplot = TRUE,
title = "Parallel Coord. Teso Measures")
ggparcoord(data = mapward,
columns = c(5:12),
groupColumn = 14,
scale = "uniminmax",
boxplot = TRUE,
title = "Parallel Coord. Teso Measures") +
facet_wrap(~ clusterNo)
parcoords(
mapward[,5:12],
reorderable = T,
brushMode = '1D-axes')
`