packages = c('rgdal', 'spdep', 'maptools','seriation', 'dendextend', 'heatmaply', 'tidyverse','sf', 'tmap', 'tidyverse', 'GGally', 'plotly', 'parcoords')

for(p in packages){library
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
##  Path to GDAL shared files: /Users/Jufri/Library/R/3.6/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
##  Path to PROJ.4 shared files: /Users/Jufri/Library/R/3.6/library/rgdal/proj
##  Linking to sp version: 1.3-2
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: seriation
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
## Loading required package: dendextend
## 
## ---------------------
## Welcome to dendextend version 1.13.4
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
## Loading required package: heatmaply
## Loading required package: plotly
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## Loading required package: viridisLite
## 
## ======================
## Welcome to heatmaply version 1.1.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## ======================
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.3
## ── Conflicts ───────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: tmap
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
## Loading required package: parcoords
#require(devtools)
#install_version("ggplot2", version = "3.2.1", repos = "http://cran.us.r-project.org")
library(ggplot2)
#https://geodacenter.github.io/tutorials/spatial_cluster/skater.html
#https://cran.r-project.org/web/packages/spdep/spdep.pdf
#mpsz <- st_read(dsn = "data/London-wards-2018_ESRI/", 
 #               layer = "London_Ward")
#mpsz <- readOGR(dsn = "data/London-wards-2018_ESRI/", 
#                layer = "London_Ward")
mpsz <- st_read("data/wardmap.shp")
## Reading layer `wardmap' from data source `/Users/Jufri/OneDrive - Singapore Management University/ISSS608 - Visual Analytics and Applications/DataViz/Week09/Week09/data/wardmap.shp' using driver `ESRI Shapefile'
## Simple feature collection with 638 features and 4 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## epsg (SRID):    27700
## proj4string:    +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs
mpsz
## Simple feature collection with 638 features and 4 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## epsg (SRID):    27700
## proj4string:    +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs
## First 10 features:
##      area_id           ward   long_x    lat_x                       geometry
## 1  E05000026          Abbey 0.081277 51.53983 MULTIPOLYGON (((544395.7 18...
## 2  E05000027         Alibon 0.150987 51.54592 MULTIPOLYGON (((549604.1 18...
## 3  E05000028      Becontree 0.116912 51.55260 MULTIPOLYGON (((547563.4 18...
## 4  E05000029 Chadwell Heath 0.138596 51.58342 MULTIPOLYGON (((548881 1910...
## 5  E05000030      Eastbrook 0.173453 51.55519 MULTIPOLYGON (((551552.9 18...
## 6  E05000031       Eastbury 0.104756 51.53581 MULTIPOLYGON (((547271.2 18...
## 7  E05000032      Gascoigne 0.085689 51.53257 MULTIPOLYGON (((543996.8 18...
## 8  E05000033     Goresbrook 0.132641 51.53604 MULTIPOLYGON (((549072.8 18...
## 9  E05000034          Heath 0.152931 51.55869 MULTIPOLYGON (((550392.7 18...
## 10 E05000035     Longbridge 0.093644 51.54443 MULTIPOLYGON (((546185.3 18...
summary(mpsz)
##       area_id           ward         long_x             lat_x      
##  E05000026:  1   Abbey    :  2   Min.   :-0.48424   Min.   :51.30  
##  E05000027:  1   Alexandra:  2   1st Qu.:-0.22076   1st Qu.:51.45  
##  E05000028:  1   Barnhill :  2   Median :-0.11886   Median :51.51  
##  E05000029:  1   Belmont  :  2   Mean   :-0.12332   Mean   :51.50  
##  E05000030:  1   Edgware  :  2   3rd Qu.:-0.02146   3rd Qu.:51.56  
##  E05000031:  1   Fairfield:  2   Max.   : 0.27621   Max.   :51.68  
##  (Other)  :632   (Other)  :626                                     
##           geometry  
##  MULTIPOLYGON :638  
##  epsg:27700   :  0  
##  +proj=tmer...:  0  
##                     
##                     
##                     
## 
plot(st_geometry(mpsz),border=gray(0.5))

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
tm_bubbles(col = "red",
           size = 0.2,
           border.col = "black",
           border.lwd = 1)
tesco <- read_csv("data/year_osward_grocery.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   area_id = col_character()
## )
## See spec(...) for full column specifications.
#row.names(tesco) <- tesco$area_id
row.names(tesco) <- tesco$area_id
## Warning: Setting row names on a tibble is deprecated.
tesco_measures <- select(tesco, c(1,18,26,34,42,50,58,66,74))
#names(tesco_measures)[names(tesco_measures) == "area_id"] <- "LAGSSCODE"
mapward <- right_join(mpsz, tesco_measures, 
            by = c("area_id" = "area_id"))
## Warning: Column `area_id` joining factor and character vector, coercing into
## character vector
vars <- c(5,6,7,8,9,10,11,12)
#dat <- select(mapward, vars)
#dat <- data.frame(mpsz_1@data[,vars])
dat <- data.frame(scale(as.data.frame(mapward)[,vars]))
summary(dat)
##       fat              saturate            salt             sugar        
##  Min.   :-4.01619   Min.   :-3.5942   Min.   :-2.7569   Min.   :-2.7651  
##  1st Qu.:-0.59329   1st Qu.:-0.5683   1st Qu.:-0.6997   1st Qu.:-0.6828  
##  Median :-0.09267   Median :-0.1003   Median : 0.0193   Median :-0.1761  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.48963   3rd Qu.: 0.4333   3rd Qu.: 0.6720   3rd Qu.: 0.7087  
##  Max.   : 8.18252   Max.   : 8.6391   Max.   : 4.3510   Max.   : 8.0120  
##     protein             carb              fibre            alcohol        
##  Min.   :-3.8440   Min.   :-3.34969   Min.   :-2.3196   Min.   :-2.55428  
##  1st Qu.:-0.4969   1st Qu.:-0.70638   1st Qu.:-0.7074   1st Qu.:-0.66014  
##  Median : 0.1434   Median : 0.02254   Median :-0.1407   Median :-0.09026  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.6857   3rd Qu.: 0.71047   3rd Qu.: 0.5421   3rd Qu.: 0.50876  
##  Max.   : 3.0311   Max.   : 3.66477   Max.   : 4.1792   Max.   : 4.98947
sdat <- scale(dat)
summary(sdat)
##       fat              saturate            salt             sugar        
##  Min.   :-4.01619   Min.   :-3.5942   Min.   :-2.7569   Min.   :-2.7651  
##  1st Qu.:-0.59329   1st Qu.:-0.5683   1st Qu.:-0.6997   1st Qu.:-0.6828  
##  Median :-0.09267   Median :-0.1003   Median : 0.0193   Median :-0.1761  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.48963   3rd Qu.: 0.4333   3rd Qu.: 0.6720   3rd Qu.: 0.7087  
##  Max.   : 8.18252   Max.   : 8.6391   Max.   : 4.3510   Max.   : 8.0120  
##     protein             carb              fibre            alcohol        
##  Min.   :-3.8440   Min.   :-3.34969   Min.   :-2.3196   Min.   :-2.55428  
##  1st Qu.:-0.4969   1st Qu.:-0.70638   1st Qu.:-0.7074   1st Qu.:-0.66014  
##  Median : 0.1434   Median : 0.02254   Median :-0.1407   Median :-0.09026  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.6857   3rd Qu.: 0.71047   3rd Qu.: 0.5421   3rd Qu.: 0.50876  
##  Max.   : 3.0311   Max.   : 3.66477   Max.   : 4.1792   Max.   : 4.98947
mpsz.nb <- poly2nb(mpsz)
summary(mpsz.nb)
## Neighbour list object:
## Number of regions: 638 
## Number of nonzero links: 3602 
## Percentage nonzero weights: 0.8849166 
## Average number of links: 5.645768 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12 
##   1  10  32  89 166 167 114  41  12   4   1   1 
## 1 least connected region:
## 298 with 1 link
## 1 most connected region:
## 249 with 12 links

Minimum spanning tree

lcosts <- nbcosts(mpsz.nb,sdat)
head(lcosts)
## [[1]]
## [1] 2.775024 2.799759 2.250071 3.158512 2.010419 2.387036
## 
## [[2]]
## [1] 1.628270 2.389215 2.376214 2.537192 1.885154 2.461399 1.362063
## 
## [[3]]
## [1] 2.3789039 1.3407767 0.9966116 2.3614112 4.0277109 3.3036025
## 
## [[4]]
## [1] 4.408389 5.044217 4.879565 3.363022 3.997615 4.725135
## 
## [[5]]
## [1] 1.628270 2.055111 2.057478 1.744264 2.053112 3.142689
## 
## [[6]]
## [1] 2.775024 2.079000 1.377151 3.546190 1.561263 2.071944
mpsz.w <- nb2listw(mpsz.nb,lcosts,style="B")
summary(mpsz.w)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 638 
## Number of nonzero links: 3602 
## Percentage nonzero weights: 0.8849166 
## Average number of links: 5.645768 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12 
##   1  10  32  89 166 167 114  41  12   4   1   1 
## 1 least connected region:
## 298 with 1 link
## 1 most connected region:
## 249 with 12 links
## 
## Weights style: B 
## Weights constants summary:
##     n     nn      S0       S1       S2
## B 638 407044 8389.95 52258.83 552577.4
mpsz.mst <- mstree(mpsz.w)
class(mpsz.mst)
## [1] "mst"    "matrix"
dim(mpsz.mst)
## [1] 637   3
head(mpsz.mst)
##      [,1] [,2]      [,3]
## [1,]   51   50 1.3565284
## [2,]   50   55 1.2130036
## [3,]   55   58 0.9583859
## [4,]   58   56 1.1724544
## [5,]   50  206 1.3853269
## [6,]  206  196 1.1872219
#plot(mpsz.mst,mpsz,col="blue",
#      cex.lab=0.7)
#plot(mpsz,border=gray(.5),add=TRUE)
clusterGrp <- skater(mpsz.mst[,1:2],sdat,4)
str(clusterGrp)
## List of 8
##  $ groups      : num [1:638] 2 2 2 2 2 2 2 2 2 2 ...
##  $ edges.groups:List of 5
##   ..$ :List of 3
##   .. ..$ node: num [1:18] 196 206 50 51 40 44 53 198 210 197 ...
##   .. ..$ edge: num [1:17, 1:3] 210 25 198 197 53 210 205 51 51 50 ...
##   .. ..$ ssw : num 34
##   ..$ :List of 3
##   .. ..$ node: num [1:77] 370 383 375 369 373 378 381 380 602 601 ...
##   .. ..$ edge: num [1:76, 1:3] 598 597 600 227 590 608 601 217 230 223 ...
##   .. ..$ ssw : num 183
##   ..$ :List of 3
##   .. ..$ node: num [1:69] 612 634 624 633 626 627 629 73 618 625 ...
##   .. ..$ edge: num [1:68, 1:3] 626 627 634 612 73 422 629 624 630 62 ...
##   .. ..$ ssw : num 142
##   ..$ :List of 3
##   .. ..$ node: num [1:371] 449 445 162 167 450 350 348 352 528 512 ...
##   .. ..$ edge: num [1:370, 1:3] 112 106 122 275 258 274 103 104 267 118 ...
##   .. ..$ ssw : num 889
##   ..$ :List of 3
##   .. ..$ node: num [1:103] 324 325 323 326 310 314 322 319 320 309 ...
##   .. ..$ edge: num [1:102, 1:3] 565 342 567 151 159 562 160 156 336 155 ...
##   .. ..$ ssw : num 187
##  $ not.prune   : NULL
##  $ candidates  : int [1:5] 1 2 3 4 5
##  $ ssto        : num 1641
##  $ ssw         : num [1:5] 1641 1572 1494 1463 1435
##  $ crit        : num [1:2] 1 Inf
##  $ vec.crit    : num [1:638] 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "class")= chr "skater"
### the skater plot, using other colors
plot(clusterGrp, coordinates(as(mpsz, "Spatial")), cex.circles=0.035, cex.lab=.7,
groups.colors=heat.colors(length(clusterGrp$ed)))

plot(st_geometry(mpsz), col=heat.colors(length(clusterGrp$edg))[clusterGrp$groups])

#mpsz_test=mpsz
#mpsz_test['checkCol'] <- clus4$groups
mpsz['clusterNo'] <- clusterGrp$groups
ccs4 <- clusterGrp$groups
ccs4
##   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 4 4 4 4 1 4 4 4 1 4 4 4 4 4 4 4 4
##  [38] 4 4 1 4 4 4 1 4 4 1 4 4 1 1 4 1 4 1 1 4 1 4 3 3 3 3 3 3 5 3 3 3 5 3 3 3 3
##  [75] 5 3 5 3 5 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [112] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5
## [149] 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [186] 4 4 4 4 4 4 4 4 4 4 1 1 1 4 4 4 4 4 4 1 1 4 4 4 1 4 4 4 1 4 4 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [260] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [297] 4 4 4 4 4 4 4 4 4 4 4 4 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [334] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 3 4 3 4 4 3 5 3 4 5 3 4 3 4 4 4 4 2 2 2
## [371] 2 2 2 2 2 4 2 2 2 2 2 2 2 2 4 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3
## [408] 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [445] 4 4 4 4 4 4 5 4 4 5 4 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [482] 4 4 4 4 4 4 4 4 4 4 4 4 2 2 4 4 2 4 2 2 2 4 4 2 4 2 4 4 4 4 4 4 4 4 4 4 4
## [519] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5
## [556] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 3 5 3 5 5 5 3 5 3 3 5 5 5 2 2 4 2
## [593] 4 2 2 2 2 2 2 2 2 2 2 4 2 2 4 2 4 4 3 3 3 3 3 5 3 3 3 3 5 5 3 3 3 3 3 3 3
## [630] 3 3 3 3 3 5 3 3 3
table(ccs4)
## ccs4
##   1   2   3   4   5 
##  18  77  69 371 103
# mpsz$clusterNo <- as.factor(mpsz$clusterNo)
mpsz['clusterNo'] <- as.factor(clusterGrp$groups)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
tm_bubbles(col = "clusterNo",
           size = 0.2,
           border.col = "black",
           border.lwd = 1)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(mpsz)+
    tm_polygons("clusterNo", title="Clusters", id="area_id") +
    #tm_text("ward", size=0.5, scale=1.5) +
    tm_format("World")
mapward['clusterNo'] <- as.factor(clusterGrp$groups)
ggparcoord(data = mapward, 
           columns = c(5:12), 
           groupColumn = 14,
           scale = "uniminmax", 
           boxplot = TRUE, 
           title = "Parallel Coord. Teso Measures")

ggparcoord(data = mapward, 
           columns = c(5:12), 
           groupColumn = 14,
           scale = "uniminmax", 
           boxplot = TRUE, 
           title = "Parallel Coord. Teso Measures") +
  facet_wrap(~ clusterNo)

parcoords(
 mapward[,5:12],
 reorderable = T,
 brushMode = '1D-axes')

`