Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
This visualisation was intended to convert government provided list-format data into a more easily understood Local Government Area (LGA) map format for the general public of Greater Sydney affected by the COVID outbreak. The visualisation includes two layers of information: 1. The number of COVID-19 cases identified in each LGA indicated by a colour scale 2. The locations and names of each cluster grouping indicated by labels
The visualisation chosen had the following three main issues:
Reference
The following code was used to fix the issues identified in the original.
# Load Libraries
library(ggplot2)
library(dplyr)
library(rgeos)
library(maptools)
library(ggmap)
library(broom)
library(mapproj)
library(ggnewscale)
library(grid)
# Import shape file
nsw.lga.shp <- readShapeSpatial("NSW_LGA_POLYGON_shp")
# Review the shape file
class(nsw.lga.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(nsw.lga.shp)
## [1] "LG_PLY_PID" "DT_CREATE" "DT_RETIRE" "LGA_PID" "NSW_LGA_sh"
## [6] "NSW_LGA__1" "NSW_LGA__2" "NSW_LGA__3" "NSW_LGA__4" "NSW_LGA__5"
head(nsw.lga.shp$NSW_LGA__3)
## [1] UNINCORPORATED UNINCORPORATED UNINCORPORATED UNINCORPORATED UNINCORPORATED
## [6] UNINCORPORATED
## 129 Levels: ALBURY ARMIDALE REGIONAL BALLINA BALRANALD ... YASS VALLEY
# Import case numbers dataset
cases <- read.csv("https://data.nsw.gov.au/data/datastore/dump/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa?bom=True")
# Review the case numbers dataset
head(cases)
## ï..notification_date postcode likely_source_of_infection lhd_2010_code
## 1 2020-01-25 2134 Overseas X700
## 2 2020-01-25 2121 Overseas X760
## 3 2020-01-25 2071 Overseas X760
## 4 2020-01-27 2033 Overseas X720
## 5 2020-03-01 2077 Overseas X760
## 6 2020-03-01 2163 Overseas X710
## lhd_2010_name lga_code19 lga_name19
## 1 Sydney 11300 Burwood (A)
## 2 Northern Sydney 16260 Parramatta (C)
## 3 Northern Sydney 14500 Ku-ring-gai (A)
## 4 South Eastern Sydney 16550 Randwick (C)
## 5 Northern Sydney 14000 Hornsby (A)
## 6 South Western Sydney 12850 Fairfield (C)
# Reduce to required columns
cases_ltd <- cases %>% select(1, 2, 7)
# Mutate columns to appropriate data types
names(cases_ltd)[names(cases_ltd) == "ï..notification_date"] <- "notification_date"
names(cases_ltd)[names(cases_ltd) == "lga_name19"] <- "lga_name"
cases_ltd$lga_name <- toupper(cases_ltd$lga_name)
cases_ltd$lga_name <- substr(cases_ltd$lga_name,1,nchar(cases_ltd$lga_name)-4)
# Filter the case numbers dataset to equivalent date of original visualisation
cases_df <- subset(cases_ltd, notification_date > "2021-06-15" & notification_date < "2021-07-10")
# Total the number of cases per LGA
cases_df$lga_name <- as.factor(cases_df$lga_name)
cases_lgas <- count(cases_df, vars = lga_name)
cases_lgas$lga_name <- cases_lgas$vars
head(cases_lgas)
## vars n lga_name
## 1 32
## 2 BALLINA 1 BALLINA
## 3 BAYSIDE 23 BAYSIDE
## 4 BLACKTOWN 8 BLACKTOWN
## 5 BURWOOD 6 BURWOOD
## 6 CAMDEN 7 CAMDEN
# Tidy the shape file into data.frame format
lga_shp <- tidy(nsw.lga.shp, region = "NSW_LGA__3")
head(lga_shp)
## # A tibble: 6 x 7
## long lat order hole piece group id
## <dbl> <dbl> <int> <lgl> <fct> <fct> <chr>
## 1 147. -36.0 1 FALSE 1 ALBURY.1 ALBURY
## 2 147. -36.0 2 FALSE 1 ALBURY.1 ALBURY
## 3 147. -36.0 3 FALSE 1 ALBURY.1 ALBURY
## 4 147. -36.0 4 FALSE 1 ALBURY.1 ALBURY
## 5 147. -36.0 5 FALSE 1 ALBURY.1 ALBURY
## 6 147. -36.0 6 FALSE 1 ALBURY.1 ALBURY
# Confirm key variable overlap (LGA Name)
lga_shp$lga_name <- lga_shp$id
#head(lga_shp)
land_only <- lga_shp[lga_shp$lga_name!="UNINCORPORATED",]
head(land_only)
## # A tibble: 6 x 8
## long lat order hole piece group id lga_name
## <dbl> <dbl> <int> <lgl> <fct> <fct> <chr> <chr>
## 1 147. -36.0 1 FALSE 1 ALBURY.1 ALBURY ALBURY
## 2 147. -36.0 2 FALSE 1 ALBURY.1 ALBURY ALBURY
## 3 147. -36.0 3 FALSE 1 ALBURY.1 ALBURY ALBURY
## 4 147. -36.0 4 FALSE 1 ALBURY.1 ALBURY ALBURY
## 5 147. -36.0 5 FALSE 1 ALBURY.1 ALBURY ALBURY
## 6 147. -36.0 6 FALSE 1 ALBURY.1 ALBURY ALBURY
# Merge the profiles
merge_profiles <- merge(land_only, cases_lgas,
by="lga_name", all.x=TRUE)
# Order the dataframe to ensure map polygons plot correctly
choro_df <- merge_profiles[order(merge_profiles$order), ]
# Remove excess LGAs to show Greater Sydney only
gr_syd <- choro_df$lga_name %in% c("BAYSIDE", "BLACKTOWN", "BURWOOD", "CANADA BAY", "CANTERBURY-BANKSTOWN","CUMBERLAND", "FAIRFIELD", "GEORGES RIVER", "HORNSBY", "HUNTERS HILL", "INNER WEST", "KU-RING-GAI", "LANE COVE", "LIVERPOOL", "MOSMAN", "NORTH SYDNEY", "NORTHERN BEACHES", "PARRAMATTA", "PENRITH", "RANDWICK", "RYDE", "STRATHFIELD", "SUTHERLAND SHIRE", "SYDNEY", "THE HILLS SHIRE", "WAVERLEY", "WILLOUGHBY", "WOOLAHRA")
head(choro_df)
## lga_name long lat order hole piece group id vars n
## 1 ALBURY 147.0972 -36.03922 1 FALSE 1 ALBURY.1 ALBURY <NA> NA
## 2 ALBURY 147.0971 -36.03934 2 FALSE 1 ALBURY.1 ALBURY <NA> NA
## 3 ALBURY 147.0969 -36.03958 3 FALSE 1 ALBURY.1 ALBURY <NA> NA
## 4 ALBURY 147.0969 -36.03975 4 FALSE 1 ALBURY.1 ALBURY <NA> NA
## 5 ALBURY 147.0970 -36.03996 5 FALSE 1 ALBURY.1 ALBURY <NA> NA
## 6 ALBURY 147.0973 -36.04028 6 FALSE 1 ALBURY.1 ALBURY <NA> NA
# Add manually input data overlay (cluster names, case numbers & locations)
id <- c(1:10)
location <- c('Bondi Westfield','Joh Bailey','Birthday Party','Great Ocean Foods','Lyfe Café','Crossways Hotel','Club Marconi','Primary School','Meriton Suites','Commonwealth Bank')
case_count <- (c(21, 21, 48, 32,45, 19, 3, 2, 28,4))
lat <- c(-33.891565,-33.8770154,-33.931387,-33.9110633,-33.8886363,-33.8918695,-33.8646654,-33.9332882,-33.8981776,-33.9342916)
long <- c(151.2483125,151.2409806,150.8165791,151.1631366,151.2688395,151.0809533,150.8781989,151.2518961,151.2130283,151.0673081)
mapcol <- c('Bondi Westfield (21 cases)'="grey0",'Joh Bailey (21 cases)'="grey1",'Birthday Party (48 cases)'="grey2",'Great Ocean Foods (32 cases)'="grey3",'Lyfe Café (45 cases)'="grey4",'Crossways Hotel (19 cases)'="grey5",'Club Marconi (3 cases)'="grey6",'Primary School (2 cases)'="grey7",'Meriton Suites (28 cases)'="grey8",'Commonwealth Bank (4 cases)'="grey9")
cluster_df <- data.frame(id, location, case_count, long, lat)
cluster_df$id <- as.factor(cluster_df$id)
cluster_df <- cluster_df %>%
mutate(group_label = paste0(location, " (", case_count, " cases)"))
cluster_df$group_label <- factor(cluster_df$group_label, levels = c('Bondi Westfield (21 cases)','Joh Bailey (21 cases)','Birthday Party (48 cases)','Great Ocean Foods (32 cases)','Lyfe Café (45 cases)','Crossways Hotel (19 cases)','Club Marconi (3 cases)','Primary School (2 cases)','Meriton Suites (28 cases)','Commonwealth Bank (4 cases)'), ordered = TRUE)
cluster_df
## id location case_count long lat
## 1 1 Bondi Westfield 21 151.2483 -33.89156
## 2 2 Joh Bailey 21 151.2410 -33.87702
## 3 3 Birthday Party 48 150.8166 -33.93139
## 4 4 Great Ocean Foods 32 151.1631 -33.91106
## 5 5 Lyfe Café 45 151.2688 -33.88864
## 6 6 Crossways Hotel 19 151.0810 -33.89187
## 7 7 Club Marconi 3 150.8782 -33.86467
## 8 8 Primary School 2 151.2519 -33.93329
## 9 9 Meriton Suites 28 151.2130 -33.89818
## 10 10 Commonwealth Bank 4 151.0673 -33.93429
## group_label
## 1 Bondi Westfield (21 cases)
## 2 Joh Bailey (21 cases)
## 3 Birthday Party (48 cases)
## 4 Great Ocean Foods (32 cases)
## 5 Lyfe Café (45 cases)
## 6 Crossways Hotel (19 cases)
## 7 Club Marconi (3 cases)
## 8 Primary School (2 cases)
## 9 Meriton Suites (28 cases)
## 10 Commonwealth Bank (4 cases)
Data Reference
NSW Government 2021, COVID-19 cases by notification date, location and likely source of infection, Data.NSW, NSW Government, Retrieved July 24, 2021 from Data.NSW Government website: https://data.nsw.gov.au/data/dataset/nsw-covid-19-cases-by-location-and-likely-source-of-infection/resource/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa?inner_span=True
Cluster naming data manually gathered from official media releases from NSW Health via NSW Health 2021, Media Releases, NSW Health, accessed July 24, 2021: https://www.health.nsw.gov.au/news/Pages/2021-nsw-health.aspx
Australian Government 2021, NSW_LGA_POLYGON.shp, data.gov.au, Australian Government, Retrieved July 24, 2021 from Australian Government website: https://data.gov.au/data/dataset/nsw-local-government-areas/resource/acd0b143-3616-4144-9ef5-d83a67f84148
The following plot fixes the main issues in the original.