Solution Sketch
library(tidyverse)
library(sf)
library(tmap)
library(DT)
data = read_csv("data/2011_2020data.csv")
#get geospatial data
mpsz <- st_read(dsn = "data/geospatial",
layer = "MP14_SUBZONE_WEB_PL")
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `C:\Users\Junwe\Desktop\4.2\VA\assignment4\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
#turn age group into sortable factors
data$AG <- factor(data$AG,levels = c("0_to_4",
"5_to_9",
"10_to_14",
"15_to_19",
"20_to_24",
"25_to_29",
"30_to_34",
"35_to_39",
"40_to_44",
"45_to_49",
"50_to_54",
"55_to_59",
"60_to_64",
"65_to_69",
"70_to_74",
"75_to_79",
"80_to_84",
"85_to_89",
"90_and_over"))
#Dataset for 2020
data2020 = data %>%
filter(Time == 2020)
#join mapdata
for_map = data2020 %>%
select(PA,Pop) %>%
mutate(PA = toupper(PA)) %>%
group_by(PA) %>%
summarise('Pop' = sum(Pop)/1000)
data2020_map <- left_join( mpsz,for_map,
by = c( "PLN_AREA_N" = "PA" ))
Pop_by_PA <- tm_shape(data2020_map)+
tm_fill("Pop",
palette = "Blues",
legend.hist = TRUE,
legend.is.portrait = TRUE,
legend.hist.z = 0.1,
title = "Count")+
tm_layout(main.title = "Distribution of Population Planning Area 2020 (Thousands)",
legend.height = 0.45,
legend.width = 0.35,
legend.outside = FALSE,
legend.position = c("right", "bottom"),
frame = FALSE)
Pop_by_PA
pop_by_age_data <- data2020 %>%
select(AG,Pop) %>%
group_by(AG) %>%
summarise('Pop' = sum(Pop)/1000) %>%
arrange(AG)
pop_by_age <- ggplot(pop_by_age_data, aes(x = AG, y = Pop)) +
geom_bar(stat='identity',fill="seagreen") +
labs(x = "Age Group", y = "Population(per thousand)", title = "Population by Age Group 2020") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45))
pop_by_age_sex_data <- data2020 %>%
select(AG,Sex,Pop) %>%
group_by(AG,Sex) %>%
summarise('Pop' = sum(Pop)/1000) %>%
mutate(Pop = if_else(Sex == "Males", -Pop, Pop))
females_pop <-
pop_by_age_sex_data %>%
filter(Sex == "Females") %>%
arrange(AG)
the_order <- females_pop$AG
pop_by_age_sex <- ggplot(pop_by_age_sex_data, aes(x = AG, y = Pop, fill = Sex)) +
geom_bar(stat='identity') +
coord_flip() +
scale_x_discrete(limits = the_order) +
scale_y_continuous(breaks = seq(-200, 200, 50),
labels = abs(seq(-200, 200, 50))) +
labs(x = "Age Group", y = "Population(per thousand)", title = "Population by Age by Gender 2020") +
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "grey90")) +
scale_fill_manual(values=c( "deepskyblue1","hotpink1"),
name="",
breaks=c("Males", "Females"),
labels=c("Males", "Female"))
pop_by_age_sex
pop_by_age
pop_by_TOD_PA_data <- data %>%
select(PA,TOD,Pop) %>%
group_by(PA,TOD) %>%
summarise("Pop" = sum(Pop)) %>%
ungroup()
pop_by_TOD_PA_chart <- ggplot(pop_by_TOD_PA_data,aes(TOD, PA, fill= Pop)) +
geom_tile() +
scale_fill_distiller(palette = "RdYlBu") +
theme(axis.text.x = element_text(angle = 90)) +
labs(title="Population Heatmap by Type of Dwelling and Planning Area 2020",
x ="Type of Dwelling", y = "Planning Area")+
theme(plot.title = element_text(hjust = 0.5))
pop_by_TOD_PA_chart
by_age_region <- data2020 %>%
select(AG,Region,Pop) %>%
group_by(AG,Region) %>%
summarise("Pop" = sum(Pop)/1000)
by_age_region_chart <- ggplot(by_age_region, aes(x=as.numeric(AG),y=Pop,fill=Region )) +
stat_smooth(
geom = 'area', method = 'loess', span = 1/3,
alpha = 1/2,position = 'identity' )+
scale_fill_brewer(palette="Spectral") +
labs(x = "Age Group", y = "Population(per thousand)", title = "Population by Region and Age 2020")+
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5))
by_age_region_chart
library(ggtern)
#assuming age 0 to 19
data_children <- data2020 %>%
filter(AG %in% c('0_to_4','5_to_9','10_to_14','15_to_19')) %>%
group_by(SZ, Region) %>%
summarise('Children' = sum(Pop))
#assuming age 20 to 59
data_adult <- data2020 %>% filter(AG %in% c('20_to_24', '25_to_29','30_to_34',
'35_to_39','40_to_44','45_to_49',
'50_to_54','55_to_59')) %>%
group_by(SZ, Region) %>%
summarise('Adult' = sum(Pop))
#assuming age 60 and above, according to Senior Citizen Act
data_elderly <- data2020 %>% filter(AG %in% c('60_to_64', '65_to_69','70_to_74',
'75_to_79','80_to_84',
'90_and_over')) %>%
group_by(SZ, Region) %>%
summarise('Elderly'= sum(Pop))
#prep data
ternary_data <- merge(data_children, data_adult,
by=c("SZ", "Region"))
ternary_data <- merge(ternary_data, data_elderly,
by=c("SZ", "Region"))
ternary_chart <- ggtern(data=ternary_data, aes(x=Children,y=Adult, z=Elderly, color=Region)) +
geom_point(alpha=0.5) +
xlab("Children") +
ylab("Adults") +
zlab("Elderly") +
labs(title="Singapore Population Structure, 2020") +
theme(plot.title=element_text(hjust = 0.5),
plot.title.position = "plot",
plot.subtitle=element_text(size=12, margin=margin(b=12))) +
theme_rgbg()
ternary_chart
Ternery Chart
Insight 1
Insight 2
Insight 3
Insight 4