#Load all the packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggiraph)
#Find the place: Find a shape file of the place you plan to make.I got it from here:https://gadm.org/download_country.html. This is a shape file of China. You can also choose the country you are interested in.
CN<-read_sf("chn_adm_ocha_2020_shp","chn_admbnda_adm1_ocha_2020")
head(CN)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 97.56054 ymin: 21.14672 xmax: 122.517 ymax: 39.5806
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 7
##   ADM1_EN                ADM1_ZH ADM1_PCODE ADM0_EN ADM0_ZH ADM0_PCODE
##   <chr>                  <chr>   <chr>      <chr>   <chr>   <chr>     
## 1 Shaanxi Province       陕西省  CN061      China   中国    CN        
## 2 Shanghai Municipality  上海市  CN031      China   中国    CN        
## 3 Chongqing Municipality 重庆市  CN050      China   中国    CN        
## 4 Zhejiang Province      浙江省  CN033      China   中国    CN        
## 5 Jiangxi Province       江西省  CN036      China   中国    CN        
## 6 Yunnan Province        云南省  CN053      China   中国    CN        
## # ℹ 1 more variable: geometry <MULTIPOLYGON [°]>
#Add population: I then found a data set of Chinese's population by province in this website:https://www.statista.com/statistics/279013/population-in-china-by-region/. I imported the xlsx file here.You can also choose other geographical data like crime rate or tourism.
excel_file_path <- "/Users/winnieren/Desktop/Fall_2023/Chinese_pop.xlsx" 
 pop <- read_excel(excel_file_path)
  View(pop)
#Combining the data: Before I merged these two data set, I wanted to make sure that they are matching. Thus, below is the way I checked that.
# Check for non-matching and non-unique values
CN %>% 
    filter(!ADM1_EN %in% CN$ADM1_EN) # Check for non-matching values
## Simple feature collection with 0 features and 6 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 7
## # ℹ 7 variables: ADM1_EN <chr>, ADM1_ZH <chr>, ADM1_PCODE <chr>, ADM0_EN <chr>,
## #   ADM0_ZH <chr>, ADM0_PCODE <chr>, geometry <GEOMETRY [°]>
# Identify duplicate values in both data frames
CN %>% 
    count(ADM1_EN) %>% 
    filter(n > 1) # Check for duplicates
## Simple feature collection with 0 features and 2 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ADM1_EN <chr>, n <int>, geometry <GEOMETRY [°]>
pop %>% 
    count(ADM1_EN) %>% 
    filter(n > 1) # Check for duplicates
## # A tibble: 0 × 2
## # ℹ 2 variables: ADM1_EN <chr>, n <int>
# Handle non-matching or non-unique values before merging
# ...

# Merge the data frames
map_data <- merge(CN,pop, by.x = "ADM1_EN", by.y = "ADM1_EN")
View(map_data)
#Setting up the density classes
map_data$DesClass<-cut(map_data$Pop, breaks =c(0,20,40,60,80,100,Inf),
                       labels=c('<20', '20-40','40-60','60-80','80-100','>100'))
#plotting:I used ggplot and geom_sf_interactive to make the interactive map. You can also try leaflet or geom_polygon.
p <-ggplot()+
  geom_sf_interactive(data=map_data,aes(fill=DesClass,tooltip = paste("Province: ", ADM1_EN, "<br>Population: ", Pop)),color='transparent')+
  geom_sf_interactive(fill='transparent',color="white",data=map_data)+
  scale_fill_viridis_d(name='Million Inhab/km2',
                       guide=guide_legend(
                         direction="horizontal",
                         title.position = 'top',
                         title.hjust = .5,
                         label.hjust = .5,
                         label.position = 'bottom'
                       ))+
  labs(title="China's demographics",
       subtitle = "Population density",
       caption=c('Source:https://www.statista.com/statistics/279013/population-in-china-by-region/'))+
  theme_void()+
  theme(title=element_text(face='bold'),
        legend.position = 'bottom')

girafe(ggobj = p)