#Load all the packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggiraph)
library(ggtext)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Find the place: Find a shape file of the place you plan to make.I got it from here:https://gadm.org/download_country.html. This is a shape file of China. You can also choose the country you are interested in.
CN<-read_sf("chn_adm_ocha_2020_shp","chn_admbnda_adm1_ocha_2020")
head(CN)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 97.56054 ymin: 21.14672 xmax: 122.517 ymax: 39.5806
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 7
##   ADM1_EN                ADM1_ZH ADM1_PCODE ADM0_EN ADM0_ZH ADM0_PCODE
##   <chr>                  <chr>   <chr>      <chr>   <chr>   <chr>     
## 1 Shaanxi Province       陕西省  CN061      China   中国    CN        
## 2 Shanghai Municipality  上海市  CN031      China   中国    CN        
## 3 Chongqing Municipality 重庆市  CN050      China   中国    CN        
## 4 Zhejiang Province      浙江省  CN033      China   中国    CN        
## 5 Jiangxi Province       江西省  CN036      China   中国    CN        
## 6 Yunnan Province        云南省  CN053      China   中国    CN        
## # ℹ 1 more variable: geometry <MULTIPOLYGON [°]>
#Add population: I then found a data set of Chinese's population by province in this website:https://www.statista.com/statistics/279013/population-in-china-by-region/. I imported the xlsx file here.You can also choose other geographical data like crime rate or tourism.
excel_file_path <- "Chinese_pop.xlsx" 
 pop <- read_excel(excel_file_path)
#Combining the data: Before I merged these two data set, I wanted to make sure that they are matching. Thus, below is the way I checked that.
# Check for non-matching and non-unique values
CN %>% 
    filter(!ADM1_EN %in% CN$ADM1_EN) # Check for non-matching values
## Simple feature collection with 0 features and 6 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 7
## # ℹ 7 variables: ADM1_EN <chr>, ADM1_ZH <chr>, ADM1_PCODE <chr>, ADM0_EN <chr>,
## #   ADM0_ZH <chr>, ADM0_PCODE <chr>, geometry <GEOMETRY [°]>
# Identify duplicate values in both data frames
CN %>% 
    count(ADM1_EN) %>% 
    filter(n > 1) # Check for duplicates
## Simple feature collection with 0 features and 2 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ADM1_EN <chr>, n <int>, geometry <GEOMETRY [°]>
pop %>% 
    count(ADM1_EN) %>% 
    filter(n > 1) # Check for duplicates
## # A tibble: 0 × 2
## # ℹ 2 variables: ADM1_EN <chr>, n <int>
# Handle non-matching or non-unique values before merging
# ...

# Merge the data frames
map_data <- merge(CN,pop, by.x = "ADM1_EN", by.y = "ADM1_EN")
glimpse(map_data)
## Rows: 33
## Columns: 8
## $ ADM1_EN    <chr> "Anhui Province", "Beijing Municipality", "Chongqing Munici…
## $ ADM1_ZH    <chr> "安徽省", "北京市", "重庆市", "福建省", "甘肃省", "广东省",…
## $ ADM1_PCODE <chr> "CN034", "CN011", "CN050", "CN035", "CN062", "CN044", "CN04…
## $ ADM0_EN    <chr> "China", "China", "China", "China", "China", "China", "Chin…
## $ ADM0_ZH    <chr> "中国", "中国", "中国", "中国", "中国", "中国", "中国", "中…
## $ ADM0_PCODE <chr> "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN", "CN",…
## $ Pop        <dbl> 61.130, 21.890, 32.120, 41.870, 24.900, 126.840, 50.370, 38…
## $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((118.8447 30..., MULTIPOLYGON (…
map_data1<-map_data%>%
  select(ADM1_EN,geometry,Pop)
map_data1$DesClass<-cut(map_data1$Pop, breaks =c(0,20,40,60,80,100,Inf),
                       labels=c('<20', '20-40','40-60','60-80','80-100','>100'))
#Setting up the density classes

#plotting:I used ggplot and geom_sf_interactive to make the interactive map. You can also try leaflet or geom_polygon.
p <-ggplot()+
  geom_sf_interactive(data=map_data1,aes(fill=DesClass,tooltip = paste("Province: ", ADM1_EN, "<br>Population: ", Pop)),color='transparent')+
  # geom_sf(data=map_data1,aes(fill=DesClass))+
  geom_sf_interactive(fill='transparent',color="white",data=map_data)+
  scale_fill_viridis_d(name='Million Inhab/km2',
                       guide=guide_legend(
                         direction="horizontal",
                         title.position = 'top',
                         title.hjust = .5,
                         label.hjust = .5,
                         label.position = 'bottom'
                       ),direction = -1)+
  labs(title="China's demographics-Population density",
       subtitle = "Population density in each province in Mainland China",
       caption= paste0('Source:https://www.statista.com/statistics/279013/population-in-china-by-region/'))+
  theme_void()+
  theme(title=element_text(face='bold'),
        legend.position = 'bottom')

girafe(ggobj = p)