Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.

Original


Source: - Anonymous, @dbRaevn 2021, NSW cases by location - 9th July 2021, Twitter.


Objective

This visualisation was intended to convert government provided list-format data into a more easily understood Local Government Area (LGA) map format for the general public of Greater Sydney affected by the COVID outbreak. The visualisation includes two layers of information: 1. The number of COVID-19 cases identified in each LGA indicated by a colour scale 2. The locations and names of each cluster grouping indicated by labels

The visualisation chosen had the following three main issues:

  • The colour scheme used is counter to intuitive colour mapping, such as weather data which would be understood as “hotter” in darker red colour areas, rather than higher case counts in brighter red areas which this visualisation shows.
  • The cluster data label leaders and points are difficult to read and confusing to the underlying map.
  • The cluster data label text is not legible.

Reference

Code

The following code was used to fix the issues identified in the original.

# Load Libraries
library(ggplot2)
library(dplyr)
library(rgeos)
library(maptools)
library(ggmap)
library(broom)
library(mapproj)
library(ggnewscale)
library(grid)
# Import shape file
nsw.lga.shp <- readShapeSpatial("NSW_LGA_POLYGON_shp")

# Review the shape file
class(nsw.lga.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(nsw.lga.shp)
##  [1] "LG_PLY_PID" "DT_CREATE"  "DT_RETIRE"  "LGA_PID"    "NSW_LGA_sh"
##  [6] "NSW_LGA__1" "NSW_LGA__2" "NSW_LGA__3" "NSW_LGA__4" "NSW_LGA__5"
head(nsw.lga.shp$NSW_LGA__3)
## [1] UNINCORPORATED UNINCORPORATED UNINCORPORATED UNINCORPORATED UNINCORPORATED
## [6] UNINCORPORATED
## 129 Levels: ALBURY ARMIDALE REGIONAL BALLINA BALRANALD ... YASS VALLEY
# Import case numbers dataset
cases <- read.csv("https://data.nsw.gov.au/data/datastore/dump/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa?bom=True")

# Review the case numbers dataset
head(cases)
##   ï..notification_date postcode likely_source_of_infection lhd_2010_code
## 1           2020-01-25     2134                   Overseas          X700
## 2           2020-01-25     2121                   Overseas          X760
## 3           2020-01-25     2071                   Overseas          X760
## 4           2020-01-27     2033                   Overseas          X720
## 5           2020-03-01     2077                   Overseas          X760
## 6           2020-03-01     2163                   Overseas          X710
##          lhd_2010_name lga_code19      lga_name19
## 1               Sydney      11300     Burwood (A)
## 2      Northern Sydney      16260  Parramatta (C)
## 3      Northern Sydney      14500 Ku-ring-gai (A)
## 4 South Eastern Sydney      16550    Randwick (C)
## 5      Northern Sydney      14000     Hornsby (A)
## 6 South Western Sydney      12850   Fairfield (C)
# Reduce to required columns
cases_ltd <- cases %>% select(1, 2, 7)

# Mutate columns to appropriate data types
names(cases_ltd)[names(cases_ltd) == "ï..notification_date"] <- "notification_date"
names(cases_ltd)[names(cases_ltd) == "lga_name19"] <- "lga_name"
cases_ltd$lga_name <- toupper(cases_ltd$lga_name)
cases_ltd$lga_name <- substr(cases_ltd$lga_name,1,nchar(cases_ltd$lga_name)-4)


# Filter the case numbers dataset to equivalent date of original visualisation
cases_df <- subset(cases_ltd, notification_date > "2021-06-15" & notification_date < "2021-07-10")

# Total the number of cases per LGA
cases_df$lga_name <- as.factor(cases_df$lga_name)
cases_lgas <- count(cases_df, vars = lga_name)
cases_lgas$lga_name <- cases_lgas$vars

head(cases_lgas)
##        vars  n  lga_name
## 1           32          
## 2   BALLINA  1   BALLINA
## 3   BAYSIDE 23   BAYSIDE
## 4 BLACKTOWN  8 BLACKTOWN
## 5   BURWOOD  6   BURWOOD
## 6    CAMDEN  7    CAMDEN
# Tidy the shape file into data.frame format
lga_shp <- tidy(nsw.lga.shp, region = "NSW_LGA__3")
head(lga_shp)
## # A tibble: 6 x 7
##    long   lat order hole  piece group    id    
##   <dbl> <dbl> <int> <lgl> <fct> <fct>    <chr> 
## 1  147. -36.0     1 FALSE 1     ALBURY.1 ALBURY
## 2  147. -36.0     2 FALSE 1     ALBURY.1 ALBURY
## 3  147. -36.0     3 FALSE 1     ALBURY.1 ALBURY
## 4  147. -36.0     4 FALSE 1     ALBURY.1 ALBURY
## 5  147. -36.0     5 FALSE 1     ALBURY.1 ALBURY
## 6  147. -36.0     6 FALSE 1     ALBURY.1 ALBURY
# Confirm key variable overlap (LGA Name)
lga_shp$lga_name <- lga_shp$id
#head(lga_shp)

land_only <- lga_shp[lga_shp$lga_name!="UNINCORPORATED",]
head(land_only)
## # A tibble: 6 x 8
##    long   lat order hole  piece group    id     lga_name
##   <dbl> <dbl> <int> <lgl> <fct> <fct>    <chr>  <chr>   
## 1  147. -36.0     1 FALSE 1     ALBURY.1 ALBURY ALBURY  
## 2  147. -36.0     2 FALSE 1     ALBURY.1 ALBURY ALBURY  
## 3  147. -36.0     3 FALSE 1     ALBURY.1 ALBURY ALBURY  
## 4  147. -36.0     4 FALSE 1     ALBURY.1 ALBURY ALBURY  
## 5  147. -36.0     5 FALSE 1     ALBURY.1 ALBURY ALBURY  
## 6  147. -36.0     6 FALSE 1     ALBURY.1 ALBURY ALBURY
# Merge the profiles
merge_profiles <- merge(land_only, cases_lgas,
                          by="lga_name", all.x=TRUE)

# Order the dataframe to ensure map polygons plot correctly
choro_df <- merge_profiles[order(merge_profiles$order), ]

# Remove excess LGAs to show Greater Sydney only
gr_syd <- choro_df$lga_name %in% c("BAYSIDE", "BLACKTOWN", "BURWOOD", "CANADA BAY",  "CANTERBURY-BANKSTOWN","CUMBERLAND", "FAIRFIELD", "GEORGES RIVER", "HORNSBY", "HUNTERS HILL", "INNER WEST", "KU-RING-GAI", "LANE COVE", "LIVERPOOL", "MOSMAN", "NORTH SYDNEY", "NORTHERN BEACHES", "PARRAMATTA", "PENRITH", "RANDWICK", "RYDE", "STRATHFIELD", "SUTHERLAND SHIRE", "SYDNEY", "THE HILLS SHIRE", "WAVERLEY", "WILLOUGHBY", "WOOLAHRA")

head(choro_df)
##   lga_name     long       lat order  hole piece    group     id vars  n
## 1   ALBURY 147.0972 -36.03922     1 FALSE     1 ALBURY.1 ALBURY <NA> NA
## 2   ALBURY 147.0971 -36.03934     2 FALSE     1 ALBURY.1 ALBURY <NA> NA
## 3   ALBURY 147.0969 -36.03958     3 FALSE     1 ALBURY.1 ALBURY <NA> NA
## 4   ALBURY 147.0969 -36.03975     4 FALSE     1 ALBURY.1 ALBURY <NA> NA
## 5   ALBURY 147.0970 -36.03996     5 FALSE     1 ALBURY.1 ALBURY <NA> NA
## 6   ALBURY 147.0973 -36.04028     6 FALSE     1 ALBURY.1 ALBURY <NA> NA
# Add manually input data overlay (cluster names, case numbers & locations)
id <- c(1:10)
location <- c('Bondi Westfield','Joh Bailey','Birthday Party','Great Ocean Foods','Lyfe Café','Crossways Hotel','Club Marconi','Primary School','Meriton Suites','Commonwealth Bank')
case_count <- (c(21, 21, 48, 32,45, 19, 3, 2, 28,4))
lat <- c(-33.891565,-33.8770154,-33.931387,-33.9110633,-33.8886363,-33.8918695,-33.8646654,-33.9332882,-33.8981776,-33.9342916)
long <- c(151.2483125,151.2409806,150.8165791,151.1631366,151.2688395,151.0809533,150.8781989,151.2518961,151.2130283,151.0673081)
mapcol <- c('Bondi Westfield (21 cases)'="grey0",'Joh Bailey (21 cases)'="grey1",'Birthday Party (48 cases)'="grey2",'Great Ocean Foods (32 cases)'="grey3",'Lyfe Café (45 cases)'="grey4",'Crossways Hotel (19 cases)'="grey5",'Club Marconi (3 cases)'="grey6",'Primary School (2 cases)'="grey7",'Meriton Suites (28 cases)'="grey8",'Commonwealth Bank (4 cases)'="grey9")


cluster_df <- data.frame(id, location, case_count, long, lat)
cluster_df$id <- as.factor(cluster_df$id)

cluster_df <- cluster_df %>% 
  mutate(group_label = paste0(location, " (", case_count, " cases)"))

cluster_df$group_label <- factor(cluster_df$group_label, levels = c('Bondi Westfield (21 cases)','Joh Bailey (21 cases)','Birthday Party (48 cases)','Great Ocean Foods (32 cases)','Lyfe Café (45 cases)','Crossways Hotel (19 cases)','Club Marconi (3 cases)','Primary School (2 cases)','Meriton Suites (28 cases)','Commonwealth Bank (4 cases)'), ordered = TRUE)

cluster_df
##    id          location case_count     long       lat
## 1   1   Bondi Westfield         21 151.2483 -33.89156
## 2   2        Joh Bailey         21 151.2410 -33.87702
## 3   3    Birthday Party         48 150.8166 -33.93139
## 4   4 Great Ocean Foods         32 151.1631 -33.91106
## 5   5         Lyfe Café         45 151.2688 -33.88864
## 6   6   Crossways Hotel         19 151.0810 -33.89187
## 7   7      Club Marconi          3 150.8782 -33.86467
## 8   8    Primary School          2 151.2519 -33.93329
## 9   9    Meriton Suites         28 151.2130 -33.89818
## 10 10 Commonwealth Bank          4 151.0673 -33.93429
##                     group_label
## 1    Bondi Westfield (21 cases)
## 2         Joh Bailey (21 cases)
## 3     Birthday Party (48 cases)
## 4  Great Ocean Foods (32 cases)
## 5          Lyfe Café (45 cases)
## 6    Crossways Hotel (19 cases)
## 7        Club Marconi (3 cases)
## 8      Primary School (2 cases)
## 9     Meriton Suites (28 cases)
## 10  Commonwealth Bank (4 cases)

Data Reference

Reconstruction

The following plot fixes the main issues in the original.