Load packages

packages <- c("tidyverse","plotly")
for (p in packages){
  if(!require (p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Data preparation

  1. get selected variables: PA, AG, Sex, Pop, Time
table1 = read.csv("respopagesextod2000to2010.csv")
table2 = read.csv("respopagesextod2011to2019.csv")
table1 = table1[,c(1,3,4,6,7)]
table2 = table2[,c(1,3,4,6,7)]
  1. sum population by PA,AG,SEX,POP to reduce records
table1 <- table1 %>%
  group_by(PA,AG,Time)%>%
  summarise(pop=sum(Pop))
table2 <- table2 %>%
  group_by(PA,AG,Time)%>%
  summarise(pop=sum(Pop))
  1. concatenate two tables
data <- union_all(table1,table2)
glimpse(data)
## Observations: 20,900
## Variables: 4
## Groups: PA, AG [1,083]
## $ PA   <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang M...
## $ AG   <fct> 0_to_4, 0_to_4, 0_to_4, 0_to_4, 0_to_4, 0_to_4, 0_to_4, 0_to_4...
## $ Time <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 20...
## $ pop  <int> 9290, 8950, 8690, 8370, 7980, 7690, 7490, 7590, 7830, 8310, 80...
  1. get the total population within each year, each year
age_total <- data %>%
  group_by(PA,Time)%>%
  summarise(pop=sum(pop))
  1. calculate the percentage of population within each planning area and year
df = merge(x = data, y = age_total, by = c("PA","Time"), all.x = TRUE)
df$Proportion = df$pop.x/df$pop.y*100
  1. sort age groups from young to elder
df$AG = factor(df$AG)
levels(df$AG) <- c("0_to_4","5_to_9","10_to_14","15_to_19","20_to_24",
                   "25_to_29","30_to_34","35_to_39","40_to_44","45_to_49",
                   "50_to_54","55_to_59","60_to_64","65_to_69","70_to_74",
                   "75_to_79","80_to_84","85_to_89","90_and_over")

Build the visualization

# Rename axis and remove grid lines
x <- list(
  title = "Planning Area",
  showgrid = F
)

y <- list(
  title = "Age group",
  showgrid = F
)


df[df$Time==2019,] %>%
  plot_ly(z=~Proportion, y=~AG, x=~PA, type = "heatmap", colors = "Blues",
          hovertemplate = paste('<b>Planning area: </br> %{x}',
                        '<br><b>Age group</b>: %{y}<b>',
                        '<br><b>AG%</b>: %{z:.2f}%<br>')) %>%
  layout(title = "Age structure of Singapore population by planning area in 2019", 
         width=1200,
         height=800,
         xaxis=x,
         yaxis=y
         )