#Load all the packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggiraph)
library(ggtext)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Find the place: Find a shape file of the place you plan to make.I got it from here:https://gadm.org/download_country.html. This is a shape file of China. You can also choose the country you are interested in.
CN<-read_sf("chn_adm_ocha_2020_shp","chn_admbnda_adm1_ocha_2020")
head(CN)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 97.56054 ymin: 21.14672 xmax: 122.517 ymax: 39.5806
## Geodetic CRS: WGS 84
## # A tibble: 6 × 7
## ADM1_EN ADM1_ZH ADM1_PCODE ADM0_EN ADM0_ZH ADM0_PCODE
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Shaanxi Province 陕西省 CN061 China 中国 CN
## 2 Shanghai Municipality 上海市 CN031 China 中国 CN
## 3 Chongqing Municipality 重庆市 CN050 China 中国 CN
## 4 Zhejiang Province 浙江省 CN033 China 中国 CN
## 5 Jiangxi Province 江西省 CN036 China 中国 CN
## 6 Yunnan Province 云南省 CN053 China 中国 CN
## # ℹ 1 more variable: geometry <MULTIPOLYGON [°]>
#Add population: I then found a data set of Chinese's population by province in this website:https://www.statista.com/statistics/279013/population-in-china-by-region/. I imported the xlsx file here.You can also choose other geographical data like crime rate or tourism.
excel_file_path <- "Chinese_pop.xlsx"
pop <- read_excel(excel_file_path)
#Combining the data: Before I merged these two data set, I wanted to make sure that they are matching. Thus, below is the way I checked that.
# Check for non-matching and non-unique values
CN %>%
filter(!ADM1_EN %in% CN$ADM1_EN) # Check for non-matching values
## Simple feature collection with 0 features and 6 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 7
## # ℹ 7 variables: ADM1_EN <chr>, ADM1_ZH <chr>, ADM1_PCODE <chr>, ADM0_EN <chr>,
## # ADM0_ZH <chr>, ADM0_PCODE <chr>, geometry <GEOMETRY [°]>
# Identify duplicate values in both data frames
CN %>%
count(ADM1_EN) %>%
filter(n > 1) # Check for duplicates
## Simple feature collection with 0 features and 2 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ADM1_EN <chr>, n <int>, geometry <GEOMETRY [°]>
pop %>%
count(ADM1_EN) %>%
filter(n > 1) # Check for duplicates
## # A tibble: 0 × 2
## # ℹ 2 variables: ADM1_EN <chr>, n <int>
# Handle non-matching or non-unique values before merging
# ...
# Merge the data frames
map_data <- merge(CN,pop, by.x = "ADM1_EN", by.y = "ADM1_EN")
glimpse(map_data)
## Rows: 33
## Columns: 8
## $ ADM1_EN <chr> "Anhui Province", "Beijing Municipality", "Chongqing Munici…
## $ ADM1_ZH <chr> "安徽省", "北京市", "重庆市", "福建省", "甘肃省", "广东省",…
## $ ADM1_PCODE <chr> "CN034", "CN011", "CN050", "CN035", "CN062", "CN044", "CN04…
## $ ADM0_EN <chr> "China", "China", "China", "China", "China", "China", "Chin…
## $ ADM0_ZH <chr> "中国", "中国", "中国", "中国", "中国", "中国", "中国", "中…
## $ ADM0_PCODE <chr> "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN",…
## $ Pop <dbl> 61.130, 21.890, 32.120, 41.870, 24.900, 126.840, 50.370, 38…
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((118.8447 30..., MULTIPOLYGON (…
map_data1<-map_data%>%
select(ADM1_EN,geometry,Pop)
map_data1$DesClass<-cut(map_data1$Pop, breaks =c(0,20,40,60,80,100,Inf),
labels=c('<20', '20-40','40-60','60-80','80-100','>100'))
#Setting up the density classes
#plotting:I used ggplot and geom_sf_interactive to make the interactive map. You can also try leaflet or geom_polygon.
p <-ggplot()+
geom_sf_interactive(data=map_data1,aes(fill=DesClass,tooltip = paste("Province: ", ADM1_EN, "<br>Population: ", Pop)),color='transparent')+
# geom_sf(data=map_data1,aes(fill=DesClass))+
geom_sf_interactive(fill='transparent',color="white",data=map_data)+
scale_fill_viridis_d(name='Million Inhab/km2',
guide=guide_legend(
direction="horizontal",
title.position = 'top',
title.hjust = .5,
label.hjust = .5,
label.position = 'bottom'
),direction = -1)+
labs(title="China's demographics-Population density",
subtitle = "Population density in each province in Mainland China",
caption= paste0('Source:https://www.statista.com/statistics/279013/population-in-china-by-region/'))+
theme_void()+
theme(title=element_text(face='bold'),
legend.position = 'bottom')
girafe(ggobj = p)